In [None]:
import speech_recognition as sr
import pyttsx3
import requests
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
import os
from datetime import datetime
import wikipedia

# Initialize the recognizer
recognizer = sr.Recognizer()
# Initialize the text-to-speech engine
engine = pyttsx3.init()

# Set properties for the text-to-speech engine
engine.setProperty('rate', 150)  # Speed percent (can go over 100)
engine.setProperty('volume', 0.9)  # Volume 0-1

# News API key (replace with your own key)
NEWS_API_KEY = 'a4b55d5265a344d2ac416684077bc204'  # Replace this with your actual NewsAPI key
NEWS_API_URL = f'https://newsapi.org/v2/top-headlines?country=us&apiKey={NEWS_API_KEY}'

# Weather API key (replace with your own key)
WEATHER_API_KEY = '20a32eb398ee8070f7d29beb01abfeba'  # Replace this with your actual OpenWeatherMap API key
WEATHER_API_URL = f'https://api.openweathermap.org/data/2.5/weather?q=Hyderabad&appid={WEATHER_API_KEY}'

# Check if GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the BLIP processor and model for image captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Load the conversational model and tokenizer for friendly conversation
conversational_model_name = "microsoft/DialoGPT-medium"
conv_model = AutoModelForCausalLM.from_pretrained(conversational_model_name).to(device)
conv_tokenizer = AutoTokenizer.from_pretrained(conversational_model_name)

# Initialize the chat history
chat_history_ids = None

def speak(text):
    """Convert text to speech"""
    try:
        engine.say(text)
        engine.runAndWait()
    except Exception as e:
        print(f"Error in speak function: {e}")

def listen():
    """Capture and recognize speech"""
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            query = recognizer.recognize_google(audio)
            print(f"You said: {query}")
            return query
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            speak("Sorry, I did not understand that.")
            return None
        except sr.RequestError:
            print("Sorry, my speech service is down.")
            speak("Sorry, my speech service is down.")
            return None

def get_news():
    """Fetch the latest news from NewsAPI"""
    try:
        response = requests.get(NEWS_API_URL)
        response.raise_for_status()  # Raises an HTTPError if the HTTP request returned an unsuccessful status code
        news_data = response.json()
        articles = news_data.get('articles', [])[:5]  # Get the top 5 news articles if available
        if not articles:
            return ["No news articles available at the moment."]
        news_list = [f"{i+1}. {article['title']}" for i, article in enumerate(articles)]
        return news_list
    except requests.exceptions.RequestException as e:
        error_message = f"An error occurred: {e}"
        print(error_message)
        speak(error_message)
        return []

def generate_caption(image_path, max_new_tokens=50, num_beams=5, repetition_penalty=1.2):
    """Generate a caption for the image"""
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt").to(device)
        
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            num_beams=num_beams,  # Beam search for better quality
            repetition_penalty=repetition_penalty,  # Penalize repetition
            early_stopping=True  # Stop early when all beams finish
        )
        caption = processor.decode(outputs[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        error_message = f"Error generating caption: {e}"
        print(error_message)
        speak(error_message)
        return "Sorry, I couldn't generate a caption for the image."

def get_weather():
    """Fetch the weather report for Hyderabad"""
    try:
        response = requests.get(WEATHER_API_URL)
        response.raise_for_status()
        weather_data = response.json()
        description = weather_data['weather'][0]['description']
        temp = weather_data['main']['temp'] - 273.15  # Convert from Kelvin to Celsius
        weather_report = f"The current weather in Hyderabad is {description} with a temperature of {temp:.2f} degrees Celsius."
        return weather_report
    except requests.exceptions.RequestException as e:
        error_message = f"An error occurred: {e}"
        print(error_message)
        speak(error_message)
        return "Sorry, I couldn't fetch the weather report."

def converse(query):
    """Generate a conversational response"""
    global chat_history_ids
    try:
        # Encode the user input and concatenate it with the chat history
        new_user_input_ids = conv_tokenizer.encode(query + conv_tokenizer.eos_token, return_tensors='pt').to(device)
        bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if chat_history_ids is not None else new_user_input_ids

        # Generate a response
        chat_history_ids = conv_model.generate(bot_input_ids, max_length=1000, pad_token_id=conv_tokenizer.eos_token_id)

        # Decode the response
        response = conv_tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
        return response
    except Exception as e:
        error_message = f"Error generating response: {e}"
        print(error_message)
        speak(error_message)
        return "Sorry, I couldn't generate a response."

def get_wikipedia_summary(query):
    """Fetch a summary from Wikipedia"""
    try:
        summary = wikipedia.summary(query, sentences=2)
        return summary
    except wikipedia.exceptions.DisambiguationError as e:
        options = e.options[:5]
        return f"Your query was too ambiguous. Did you mean: {', '.join(options)}?"
    except wikipedia.exceptions.PageError:
        return "Sorry, I couldn't find any information on that topic."
    except Exception as e:
        error_message = f"Error fetching Wikipedia summary: {e}"
        print(error_message)
        speak(error_message)
        return "Sorry, I couldn't fetch the Wikipedia summary."

def greet_user():
    """Greet the user based on the time of day"""
    current_hour = datetime.now().hour
    if 5 <= current_hour < 12:
        greeting = "Good morning! This is Jarvis, your friendly assistant. How can I help you today?"
    elif 12 <= current_hour < 18:
        greeting = "Good afternoon! This is Jarvis, your friendly assistant. How can I help you today?"
    else:
        greeting = "Good evening! This is Jarvis, your friendly assistant. How can I help you today?"
    speak(greeting)

def main():
    greet_user()
    while True:
        query = listen()
        
        if query:
            if 'news' in query.lower():
                speak("Fetching the latest news for you.")
                news = get_news()
                for article in news:
                    speak(article)
            elif 'image' in query.lower():
                speak("Generating caption for the image.")
                screenshots_folder = "C:\\Users\\Dell\\Documents\\minor-project\\dataset\\Images (1) (1)\\Augmented_Images"
                image_files = os.listdir(screenshots_folder)
                if image_files:
                    image_path = os.path.join(screenshots_folder, image_files[0])
                    caption = generate_caption(image_path)
                    speak(caption)
                else:
                    speak("No images found in the screenshots folder.")
            elif 'weather' in query.lower():
                speak("Fetching the weather report for Hyderabad.")
                weather_report = get_weather()
                speak(weather_report)
            elif 'wikipedia' in query.lower():
                speak("What would you like to know from Wikipedia?")
                wiki_query = listen()
                if wiki_query:
                    speak("Searching Wikipedia...")
                    summary = get_wikipedia_summary(wiki_query)
                    speak(summary)
            elif 'stop' in query.lower() or 'exit' in query.lower():
                speak("Goodbye! Have a great day!")
                break
            else:
                response = converse(query)
                speak(response)

if __name__ == "__main__":
    main()
