<a href="https://colab.research.google.com/github/seriserendipia/HP_QA_system/blob/main/myHarry_Potter_QA_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers sentence-transformers faiss-cpu



In [None]:
!pip install gradio

In [None]:
from google.colab import userdata
hugging_face_key = userdata.get('Hugging_Face_Token')
deepseek_key = userdata.get('DeepSeek_Token')
from huggingface_hub import login
login(token=hugging_face_key)

In [None]:
import gradio as gr
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
import numpy as np
from openai import OpenAI


# Initialize embedding model
embedder = SentenceTransformer("all-mpnet-base-v2")
prompt = None

# Load and process the Harry Potter text
with open("/content/01 Harry Potter and the Sorcerers Stone.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

# Split text into chunks
def split_into_chunks(text, max_length=300):
    return [text[i:i+max_length] for i in range(0, len(text), max_length)]

chunks = split_into_chunks(raw_text)

# Create embeddings for each chunk
chunk_embeddings = embedder.encode(chunks, convert_to_tensor=False)

# Build FAISS index
index = faiss.IndexFlatL2(chunk_embeddings[0].shape[0])
index.add(np.array(chunk_embeddings))

# Initialize Hugging Face QA model
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")


client = OpenAI(api_key=deepseek_key, base_url="https://api.deepseek.com")

# Function to retrieve context based on the question
def get_context(question, top_k=5):
    q_embed = embedder.encode([question])[0]
    _, I = index.search(np.array([q_embed]), top_k)
    context = "\n".join([chunks[i] for i in I[0]])
    return context

# Function to get answer from Hugging Face model
def ask_hf(prompt):
    result = qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text']
    return result.strip()

# Function to get answer from DeepSeek model
def ask_deepseek(prompt):
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        stream=False
    )
    return response.choices[0].message.content.strip()

# Combined function to get answers from both models
def ask_both(question):
    context = get_context(question)
    prompt = f"Answer the question based on the context.\nContext: {context}\nQuestion: {question}"
    print("prompt:", prompt)
    import logging
    logging.basicConfig(level=logging.INFO)
    logging.info(prompt)

    hf_answer = ask_hf(prompt)
    ds_answer = ask_deepseek(prompt)
    return hf_answer, ds_answer




In [None]:
ask_both("How do you make something float in the air?")

In [None]:
# Build Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🧙 Harry Potter Knowledge QA System")

    with gr.Row():
        question_input = gr.Textbox(label="Your Magic Question")
        hf_output = gr.Textbox(label="🧠 Hugging Face Answer")
        ds_output = gr.Textbox(label="🔮 DeepSeek Answer")

    question_input.submit(ask_both, inputs=question_input, outputs=[hf_output, ds_output])

demo.launch()