## Libraries

In [None]:
!pip install faiss-gpu
!pip install googletrans
!pip install SpeechRecognition
!pip install sentence-transformers googletrans==4.0.0-rc1 openai gtts
!pip install deep-translator
!pip install langdetect
!pip install google-generativeai
!pip install genai

In [None]:
import faiss
import numpy as np
import json
import torch
from deep_translator import GoogleTranslator
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import speech_recognition as sr
from gtts import gTTS
import os
from huggingface_hub import login
import google.generativeai as genai
from IPython.display import Audio
from langdetect import detect

## Preprocessing & Model initialization 

In [None]:
genai.configure(api_key="YOUR_API_TOKEN")
login(token="YOUR_API_TOKEN")

with open("/kaggle/input/q-and-a-json/Q-and-A-organized-file.json", "r", encoding="utf-8") as file:
    qa_data = json.load(file)

questions = []
answers = []
for category in qa_data["first_aid_questions"]:
    for item in category["questions"]:
        questions.append(item["question"])
        answers.append(item["answer"])

from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Efficient model

question_embeddings = np.array(embedding_model.encode(questions)).astype("float32")
index = faiss.IndexFlatL2(question_embeddings.shape[1])
index.add(question_embeddings)

mistral_model = "mistralai/Mistral-7B-Instruct-v0.1"
qa_pipeline = pipeline("text-generation", model=mistral_model, tokenizer=mistral_model, torch_dtype=torch.float16, device_map="auto")

## Converting audio Type

In [None]:
!ffmpeg -i /kaggle/input/voice-test-chatbot/burn-ar.wav -acodec pcm_s16le -ar 16000 burn_ar_fixed.wav

## Helper functions & Main Chatbot 

In [None]:
def paraphrase_to_egyptian(text):
    """ Convert Modern Standard Arabic (MSA) to Egyptian Arabic (العامية المصرية) """
    model = genai.GenerativeModel("gemini-1.5-flash")  
    prompt = f"Rewrite this text in simple Egyptian Arabic dialect (العامية المصرية) using common everyday terms. Provide only ONE clear version without alternatives: {text}"
    response = model.generate_content(prompt)
    return response.text
    ########################################################################################
def translate_text(text, src="ar", dest="en"):
    return GoogleTranslator(source=src, target=dest).translate(text)
    #########################################################################################
def find_top_k_answers(query, k=5, threshold=0.9):
    query_embedding = np.array(embedding_model.encode([query])).astype("float32")
    distances, indices = index.search(query_embedding, k)
    print(distances)
    min_distance = min(distances[0]) if distances[0].size > 0 else float("inf")
    if min_distance > threshold: 
        return None 
    return [(questions[i], answers[i]) for i in indices[0] if i < len(answers)]
    ###########################################################################################
def refine_with_mistral(prompt, max_new_tokens=100, temperature=0.5, do_sample=False, top_k=30, top_p=0.8):
    refined_text = qa_pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p
    )[0]["generated_text"]
    return refined_text
    ##############################################################################################
def text_to_speech(text, lang="ar"):
    tts = gTTS(text=text, lang=lang)
    tts.save("response_burn_ar.mp3")
    return Audio("response_burn_ar.mp3")  
    #############################################################################################
def chatbot(text):
    lang = detect(text)
    print(lang)
    response = ""
    if (lang=='ar'):
        english_text = translate_text(text, src="ar", dest="en") 
        top_answers = find_top_k_answers(english_text, k=2)  
        if top_answers is None:
            return "معلش مش فاهم السؤال !" 
        answer_prompt = "Refine this answer in Arabic: " + " ".join([ans for _, ans in top_answers])
        refined_answer = refine_with_mistral(answer_prompt)
        standard_arabic = translate_text(refined_answer, src="en", dest="ar")
        response = paraphrase_to_egyptian(standard_arabic)
        
    elif(lang=='en') :
        top_answers = find_top_k_answers(text, k=2)  
        if top_answers is None:
            return "Sorry, I don't understand"  
        answer_prompt = "Refine this answer in English: " + " ".join([ans for _, ans in top_answers])
        refined_answer = refine_with_mistral(answer_prompt)
        response=refined_answer.replace("Refine this answer in English: ", "")
    return response
       ##########################################################################################
def voice_chatbot():
    recognizer = sr.Recognizer()
    audio_file = "/kaggle/working/burn_ar_fixed.wav" 
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
    detected_lang = None  
    try:
        text = recognizer.recognize_google(audio, language="ar")
        detected_lang = "ar"
    except sr.UnknownValueError:
        print("Arabic recognition failed, trying English...")
    if detected_lang is None:
        try:
            text = recognizer.recognize_google(audio, language="en")
            detected_lang = "en"
        except sr.UnknownValueError:
            print("Error: Could not understand the audio in English or Arabic.")
            return
    
    print(f"Recognized Text: {text} (Detected: {detected_lang})")
    response = chatbot(text)
    return text_to_speech(response, lang=detected_lang)
    ###########################################################################################
if __name__ == "__main__":
    while True:
        mode = input("Choose mode: (1) Text, (2) Voice, (3) Exit: ")
        if mode == "1":
            user_input = input("You: ")
            print(chatbot(user_input))
        elif mode == "2":
            voice_chatbot()
        elif mode == "3":
            break
        else:
            print("Invalid option. Try again.")
