In [1]:
import PyPDF2
import json
import re

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Function to structure the data by chapters
def structure_chapters(text):
    # Enhanced regex pattern to detect "Chapter <Number>" and capture titles more reliably
    chapter_pattern = r"(Chapter\s+\d+\s*[:\-]?.*)"
    
    # Split the text based on detected chapter titles using regex
    split_text = re.split(chapter_pattern, text)
    
    # Dictionary to store structured chapter content
    structured_data = {}
    
    # Start at index 1 to skip initial content before Chapter 1, if any
    for i in range(1, len(split_text), 2):
        chapter_title = split_text[i].strip()  # Chapter title
        chapter_content = split_text[i + 1].strip()  # Corresponding content
        structured_data[chapter_title] = chapter_content

    return structured_data

# Main function to run the extraction and structuring
def main():
    pdf_path = "3 - Harry Potter and the Prisoner of Azkaban.pdf"  # Update this path
    extracted_text = extract_text_from_pdf(pdf_path)
    
    # Verify extraction success
    if extracted_text.strip():
        print("PDF text extraction successful.")
    else:
        print("Error: Could not extract text from PDF. Check the file path and content.")
        return
    
    # Structure the chapters using the extracted text
    structured_data = structure_chapters(extracted_text)
    
    # Save the structured data to a JSON file
    with open("prisoner_of_azkaban2.json", "w", encoding="utf-8") as json_file:
        json.dump(structured_data, json_file, ensure_ascii=False, indent=4)
        print(f"Structured data saved successfully in 'HogwartsQ&A/prisoner_of_azkaban2.json'.")

if __name__ == "__main__":
    main()

PDF text extraction successful.
Structured data saved successfully in 'HogwartsQ&A/prisoner_of_azkaban2.json'.


In [2]:
import json
import os

# Function to split text into chunks with overlap
def split_text_into_chunks(text, chunk_size=100, overlap_size=20):
    words = text.split()
    chunks = []
    start = 0

    while start < len(words):
        end = start + chunk_size
        # Create the chunk and add it to the list
        chunk = " ".join(words[start:end])
        chunks.append(chunk)

        # Move the start index for the next chunk
        start += chunk_size - overlap_size

    return chunks

# Function to process and chunk each chapter's content
def process_and_chunk_chapters(json_path, output_path):
    # Load the structured JSON data
    with open(json_path, "r", encoding="utf-8") as file:
        book_data = json.load(file)

    chunked_data = {}
    
    for chapter, content in book_data.items():
        # Split the content into paragraphs based on new lines
        paragraphs = content.split("\n\n")

        # Process each paragraph and split into chunks
        chapter_chunks = []
        for paragraph in paragraphs:
            paragraph = paragraph.strip()  # Remove leading and trailing whitespace
            if paragraph:
                # Split each paragraph into smaller chunks
                paragraph_chunks = split_text_into_chunks(paragraph)
                chapter_chunks.extend(paragraph_chunks)

        chunked_data[chapter] = chapter_chunks

    # Save the chunked data to a new JSON file
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w", encoding="utf-8") as out_file:
        json.dump(chunked_data, out_file, ensure_ascii=False, indent=4)
        print(f"Chunked data saved successfully in '{output_path}'.")

# Main function
def main():
    json_path = "prisoner_of_azkaban2.json"  # Path to the structured JSON file
    output_path = "new/prisoner_of_azkaban_chunked.json"  # Output path for chunked data
    
    # Ensure the structured JSON file exists
    if not os.path.exists(json_path):
        print(f"Error: File '{json_path}' not found.")
        return

    # Process the chapters and chunk the data
    process_and_chunk_chapters(json_path, output_path)

if __name__ == "__main__":
    main()

Chunked data saved successfully in 'new/prisoner_of_azkaban_chunked.json'.


In [3]:
from sentence_transformers import SentenceTransformer
import json
import numpy as np
import os

# Function to generate embeddings for each text chunk
def generate_embeddings_for_chunks(json_path, model_name='all-mpnet-base-v2', output_path="HogwartsQ&A/embeddings.npy"):
    # Load the structured chunked data
    with open(json_path, "r", encoding="utf-8") as file:
        chunked_data = json.load(file)

    # Load the pre-trained embedding model
    model = SentenceTransformer(model_name)
    print(f"Model '{model_name}' loaded successfully.")

    # Store all embeddings and corresponding text chunks
    all_embeddings = []
    all_texts = []
    
    # Generate embeddings for each chapter's chunks
    for chapter, chunks in chunked_data.items():
        for chunk in chunks:
            embedding = model.encode(chunk)  # Convert the chunk into a dense vector
            all_embeddings.append(embedding)
            all_texts.append((chapter, chunk))  # Keep track of which chunk belongs to which chapter

    # Convert embeddings to a numpy array
    all_embeddings = np.array(all_embeddings)

    # Save embeddings and corresponding texts
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    np.save(output_path, all_embeddings)
    
    # Save text references for easy retrieval
    with open(output_path.replace(".npy", "_texts.json"), "w", encoding="utf-8") as out_file:
        json.dump(all_texts, out_file, ensure_ascii=False, indent=4)
        
    print(f"Embeddings and texts saved successfully in '{output_path}' and '{output_path.replace('.npy', '_texts.json')}'.")

# Main function
def main():
    chunked_json_path = "new/prisoner_of_azkaban_chunked.json"  # Path to the chunked data JSON file
    embeddings_output_path = "new/embeddings_new_model.npy"  # Output path for the embeddings
    
    # Ensure the chunked JSON file exists
    if not os.path.exists(chunked_json_path):
        print(f"Error: File '{chunked_json_path}' not found. Please run the chunking step first.")
        return

    # Generate and save embeddings
    generate_embeddings_for_chunks(chunked_json_path, output_path=embeddings_output_path)

if __name__ == "__main__":
    main()

  from tqdm.autonotebook import tqdm, trange


Model 'all-mpnet-base-v2' loaded successfully.
Embeddings and texts saved successfully in 'new/embeddings_new_model.npy' and 'new/embeddings_new_model_texts.json'.


In [5]:
import faiss

# Function to store embeddings in FAISS
def store_embeddings_in_faiss(embeddings_path, texts_path, index_path="new/faiss_index.index"):
    # Load embeddings and corresponding texts
    embeddings = np.load(embeddings_path)
    with open(texts_path, "r", encoding="utf-8") as file:
        texts = json.load(file)

    # Create a FAISS index
    dimension = embeddings.shape[1]  # Number of dimensions of embeddings
    index = faiss.IndexFlatL2(dimension)  # Using L2 distance for similarity search

    # Add embeddings to the index
    index.add(embeddings)

    # Save the index to disk
    faiss.write_index(index, index_path)

    # Save texts for retrieval
    with open(index_path.replace(".index", "_texts.json"), "w", encoding="utf-8") as out_file:
        json.dump(texts, out_file, ensure_ascii=False, indent=4)

    print(f"Embeddings stored in FAISS index successfully at '{index_path}'.")

# Main function to run the FAISS embedding storage
def main():
    chunked_json_path = "new/prisoner_of_azkaban_chunked.json"
    embeddings_output_path = "new/embeddings_new_model.npy"
    texts_output_path = "new/embeddings_new_model_texts.json"  # Corresponding texts

    # Ensure the embeddings and texts files exist
    if not os.path.exists(embeddings_output_path) or not os.path.exists(texts_output_path):
        print("Error: Embeddings or texts files not found. Please run the embedding generation step first.")
        return

    # Store embeddings in FAISS
    store_embeddings_in_faiss(embeddings_output_path, texts_output_path)

if __name__ == "__main__":
    main()

Embeddings stored in FAISS index successfully at 'new/faiss_index.index'.


In [11]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import json
import os

# Function to query FAISS
def query_faiss(user_query, model_name='all-mpnet-base-v2', top_n=5, index_path="new/faiss_index.index"):
    # Load the pre-trained embedding model
    model = SentenceTransformer(model_name)
    
    # Convert user query to embedding
    query_embedding = model.encode(user_query)

    # Load the FAISS index
    index = faiss.read_index(index_path)

    # Perform a similarity search
    D, I = index.search(np.array([query_embedding]), top_n)  # D: distances, I: indices of nearest neighbors

    # Load the corresponding texts
    texts_path = index_path.replace(".index", "_texts.json")
    with open(texts_path, "r", encoding="utf-8") as file:
        texts = json.load(file)

    # Retrieve top N relevant chunks
    relevant_chunks = [(texts[i], D[0][idx]) for idx, i in enumerate(I[0])]

    return relevant_chunks

# Main function to run the query handling
def main():
    user_query = input("Enter your query: ")
    top_n = 5  # Number of top results to retrieve

    results = query_faiss(user_query, top_n=top_n)
    print("Top relevant chunks:")
    for chunk, distance in results:
        print(f"Chunk: {chunk}, Distance: {distance}\n")

if __name__ == "__main__":
    main()

Enter your query:  who is harry potter


Top relevant chunks:
Chunk: ['Chapter 4\nThe Leaky Cauldron', 'a round-faced, forgetful boy, outside Flourish and Blotts. Harry didn’t stop to chat; Neville appeared to have mislaid his booklist and was being told oﬀ by his very formidable-looking grandmother. Harry hoped she never found out that he’d pretended to be Neville while on the run from the Ministry of Magic. Harry woke on the last day of the holidays, thinking that he would at least meet Ron and Hermione tomorrow, on the Hogwarts Express. He got up, dressed, went for a last look at the Firebolt, and was just wondering where he’d have lunch, when someone yelled his name'], Distance: 0.922834575176239

Chunk: ['Chapter 1\nOwl Post', 'Harry Potter was a highly unusual boy in many ways. For one thing, he hated the summer holidays more than any other time of year. For another, he really wanted to do his homework but was forced to do it in secret, in the dead of night. And he also happened to be a wizard. It was nearly midnight, a

In [32]:
import requests
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import json

# Define your Hugging Face API token and model endpoint
# API_TOKEN = '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'
# MODEL_NAME = 'openai-community/gpt2'  # Replace with the desired Hugging Face model

API_TOKEN = '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'
MODEL_NAME = 'meta-llama/Llama-3.2-11B-Vision-Instruct'  # Replace with the desired Hugging Face model
# Function to generate a response from the LLM using the Hugging Face API
def generate_response(prompt):
    headers = {
        'Authorization': f'Bearer {API_TOKEN}',
        'Content-Type': 'application/json'
    }
    payload = {
        'inputs': prompt,
        'options': {
            'use_cache': False,  # Disable caching to get real-time responses
        }
    }

    # Make the POST request to the Hugging Face model endpoint
    response = requests.post(f'https://api-inference.huggingface.co/models/{MODEL_NAME}', headers=headers, json=payload)
    
    # Check if the request was successful
    if response.status_code == 200:
        try:
            # Attempt to extract and return the generated text from the response
            return response.json()[0]['generated_text']
        except (KeyError, IndexError) as e:
            print("Unexpected response structure:", response.json(), "Error:", e)
            return None
    else:
        print("Error:", response.status_code, response.text)
        return None

# Function to load embeddings and text data
def load_embeddings_and_texts(embeddings_path, texts_path):
    embeddings = np.load(embeddings_path)
    with open(texts_path, "r", encoding="utf-8") as file:
        texts = json.load(file)
    return embeddings, texts

# Function to load the FAISS index from disk
def load_faiss_index(index_path):
    index = faiss.read_index(index_path)
    return index

# Function to handle user queries and retrieve relevant chunks
def handle_query(query, model, index, texts, top_k=5):
    # Convert the user query into an embedding
    query_embedding = model.encode(query)
    
    # Use the FAISS index to find top-N similar chunks
    distances, indices = index.search(np.array([query_embedding]), top_k)
    
    # Retrieve the corresponding text chunks based on the indices
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        chapter, chunk = texts[idx]  # Get chapter and text chunk based on index
        results.append({
            "chapter": chapter,
            "text_chunk": chunk,
            "distance": dist
        })
    
    return results

# Function to create a cohesive prompt using top retrieved chunks
def create_prompt(query, top_chunks):
    # Combine the top k chunks as context for the generative model
    context = " ".join([chunk["text_chunk"] for chunk in top_chunks])
    
    # Create a structured prompt with context and question
    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    return prompt

# Main function to handle the query, retrieve top chunks, and generate a response
def main():
    # Paths to the saved files (assuming you have these paths set up)
    embeddings_path = "new/embeddings_new_model.npy"
    texts_path = "new/embeddings_new_model_texts.json"
    faiss_index_path = "new/faiss_index.index"

    # Load the pre-trained embedding model
    sentence_model_name = 'all-mpnet-base-v2'
    sentence_model = SentenceTransformer(sentence_model_name)
    print(f"Model '{sentence_model_name}' loaded successfully.")
    
    # Load embeddings and text references
    embeddings, texts = load_embeddings_and_texts(embeddings_path, texts_path)

    # Load the FAISS index
    index = load_faiss_index(faiss_index_path)
    print("FAISS index loaded successfully.")
    
    # Example query
    user_query = "who is harry potter's girlfriend"

    # Handle the user query and retrieve top 5 results
    top_results = handle_query(user_query, sentence_model, index, texts, top_k=5)

    # Display the top-5 chunks retrieved

    # Create a cohesive prompt using the top retrieved chunks
    final_prompt = create_prompt(user_query, top_results)

    # Generate a response using the Hugging Face API and the final prompt
    generated_answer = generate_response(final_prompt)

    # Display the final answer
    print("LLM Generated Response:")
    if generated_answer:
        print(generated_answer)
    else:
        print("No valid response received from the model.")

if __name__ == "__main__":
    main()

Model 'all-mpnet-base-v2' loaded successfully.
FAISS index loaded successfully.
LLM Generated Response:
 Cho Chang
Related Questions: harry potter girlfriend name
" Someone who comes from a particularly good wizarding family — Can't say I've gotten to know her very well, but she seems to be part of that set that fawns over the Malfoys."
His new girlfriend, Cheryl Rodewald. Interestingly, Granger, now known professionally as Hermione, Another Weekend. He asked various relationships Hermione for her help, stressing that he believed in her, Ginny. Toonkey


In [2]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import requests
import nest_asyncio
import numpy as np
import faiss
import json
import uvicorn
from sentence_transformers import SentenceTransformer
import os
from contextlib import asynccontextmanager
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles

nest_asyncio.apply()

# Global configuration variables
API_TOKEN = os.getenv("HUGGING_FACE_API_TOKEN", "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")  # Replace with your API Token or set as an environment variable
MODEL_NAME = os.getenv("HUGGING_FACE_MODEL_NAME", "meta-llama/Llama-3.2-11B-Vision-Instruct")  # Replace with model name or set as an environment variable
SENTENCE_MODEL_NAME = 'all-mpnet-base-v2'

EMBEDDINGS_PATH = "embeddings_new_model.npy"
TEXTS_PATH = "embeddings_new_model_texts.json"
FAISS_INDEX_PATH = "faiss_index.index"

# Load pre-trained model and FAISS index
sentence_model = SentenceTransformer(SENTENCE_MODEL_NAME)
index = None
texts = None

def load_embeddings_and_texts(embeddings_path, texts_path):
    try:
        with open(texts_path, "r") as f:
            texts = json.load(f)
        embeddings = np.load(embeddings_path)
        return embeddings, texts
    except Exception as e:
        print(f"Error loading embeddings or texts: {e}")
        raise

# Function to load the FAISS index
def load_faiss_index(faiss_index_path):
    try:
        if not os.path.exists(faiss_index_path):
            raise FileNotFoundError(f"FAISS index file does not exist at {faiss_index_path}")
        index = faiss.read_index(faiss_index_path)
        return index
    except Exception as e:
        print(f"Error loading FAISS index: {e}")
        raise

# Use an async context manager to handle startup and shutdown
@asynccontextmanager
async def lifespan(app: FastAPI):
    global index, texts
    try:
        embeddings, texts = load_embeddings_and_texts(EMBEDDINGS_PATH, TEXTS_PATH)
        index = load_faiss_index(FAISS_INDEX_PATH)
        if index is None:
            raise ValueError("Failed to load FAISS index.")
        yield  # The application will run while this context is active
    finally:
        print("Application is shutting down.")

# Define the FastAPI app
app = FastAPI(lifespan=lifespan)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Specify your frontend origin
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

app.mount("/static", StaticFiles(directory="static"), name="static")

@app.get("/", response_class=HTMLResponse)
async def serve_html():
    html_path = "static/chat-ui.html"
    with open(html_path, "r") as html_file:
        return HTMLResponse(content=html_file.read(), status_code=200)

# Define the request and response models
class QueryRequest(BaseModel):
    user_query: str

class ResponseModel(BaseModel):
    generated_answer: str

# Function to handle query and retrieve relevant chunks
def handle_query(query, model, index, texts, top_k=5):
    query_embedding = model.encode(query)
    distances, indices = index.search(np.array([query_embedding]), top_k)
    
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        chapter, chunk = texts[idx]
        results.append({
            "chapter": chapter,
            "text_chunk": chunk,
            "distance": dist
        })
    
    return results

# Function to generate a response from the LLM using the Hugging Face API
def generate_response(prompt, max_new_tokens=50):
    headers = {
        'Authorization': f'Bearer {API_TOKEN}',
        'Content-Type': 'application/json'
    }
    payload = {
        'inputs': prompt,
        'options': {
            'use_cache': False,
            'max_new_tokens': max_new_tokens
        }
    }

    response = requests.post(f'https://api-inference.huggingface.co/models/{MODEL_NAME}', headers=headers, json=payload)
    
    if response.status_code == 200:
        try:
            return response.json()[0]['generated_text']
        except (KeyError, IndexError) as e:
            print("Unexpected response structure:", response.json(), "Error:", e)
            return None
    else:
        print("Error:", response.status_code, response.text)
        return None

# Function to create a cohesive prompt using top retrieved chunks
def create_prompt(query, top_chunks, max_tokens=900):
    combined_context = ""
    current_token_count = 0

    for chunk in top_chunks:
        chunk_text = chunk["text_chunk"]
        chunk_token_count = len(chunk_text) // 5

        if current_token_count + chunk_token_count <= max_tokens:
            combined_context += f"{chunk_text} "
            current_token_count += chunk_token_count
        else:
            break

    prompt = f"Context: {combined_context.strip()}\nQuestion: {query}\nAnswer:"
    return prompt

# Define the main endpoint for handling query-response interactions
# @app.post("/generate-answer", response_model=ResponseModel)
# async def generate_answer(query_request: QueryRequest):
#     user_query = query_request.user_query

#     # Retrieve the top 5 chunks from the FAISS index
#     top_results = handle_query(user_query, sentence_model, index, texts, top_k=5)
    
#     if not top_results:
#         raise HTTPException(status_code=404, detail="No relevant chunks found.")

#     # Create a prompt with the top chunks
#     final_prompt = create_prompt(user_query, top_results, max_tokens=900)

#     # Generate a response using the Hugging Face API
#     generated_answer = generate_response(final_prompt, max_new_tokens=50)
    
#     if not generated_answer:
#         raise HTTPException(status_code=500, detail="Failed to generate a response from the model.")

#     return {"generated_answer": generated_answer}


@app.post("/generate-answer", response_model=ResponseModel)
async def generate_answer(query_request: QueryRequest):
    user_query = query_request.user_query

    # Retrieve the top 5 chunks from the FAISS index
    top_results = handle_query(user_query, sentence_model, index, texts, top_k=5)
    
    if not top_results:
        raise HTTPException(status_code=404, detail="No relevant chunks found.")

    # Create a prompt with the top chunks
    final_prompt = create_prompt(user_query, top_results, max_tokens=900)

    # Generate a response using the Hugging Face API
    generated_answer = generate_response(final_prompt, max_new_tokens=50)
    
    if not generated_answer:
        raise HTTPException(status_code=500, detail="Failed to generate a response from the model.")

    # Find the last full stop in the generated answer
    last_full_stop_idx = generated_answer.rfind('.')
    
    if last_full_stop_idx != -1:
        # If a full stop exists, truncate the answer up to the last full stop
        truncated_answer = generated_answer[:last_full_stop_idx + 1]
    else:
        # If no full stop is found, return the answer as it is
        truncated_answer = generated_answer

    return {"generated_answer": truncated_answer}


# Run the FastAPI app
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [16872]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:59003 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59066 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59109 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59109 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59189 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59189 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59189 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59189 - "POST /generate-answer HTTP/1.1" 200 OK
INFO:     127.0.0.1:59279 - "POST /generate-answer HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [16872]


Application is shutting down.


In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import requests
import nest_asyncio
import numpy as np
import faiss
import json
import uvicorn
from sentence_transformers import SentenceTransformer
import os
from contextlib import asynccontextmanager
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles

nest_asyncio.apply()

# Global configuration variables
API_TOKEN = os.getenv("HUGGING_FACE_API_TOKEN", "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")  # Replace with your API Token or set as an environment variable
MODEL_NAME = os.getenv("HUGGING_FACE_MODEL_NAME", "meta-llama/Llama-3.2-11B-Vision-Instruct")  # Replace with model name or set as an environment variable
SENTENCE_MODEL_NAME = 'all-mpnet-base-v2'

EMBEDDINGS_PATH = "embeddings_new_model.npy"
TEXTS_PATH = "embeddings_new_model_texts.json"
FAISS_INDEX_PATH = "faiss_index.index"

# Load pre-trained model and FAISS index
sentence_model = SentenceTransformer(SENTENCE_MODEL_NAME)
index = None
texts = None

def load_embeddings_and_texts(embeddings_path, texts_path):
    try:
        with open(texts_path, "r") as f:
            texts = json.load(f)
        embeddings = np.load(embeddings_path)
        return embeddings, texts
    except Exception as e:
        print(f"Error loading embeddings or texts: {e}")
        raise

# Function to load the FAISS index
def load_faiss_index(faiss_index_path):
    try:
        if not os.path.exists(faiss_index_path):
            raise FileNotFoundError(f"FAISS index file does not exist at {faiss_index_path}")
        index = faiss.read_index(faiss_index_path)
        return index
    except Exception as e:
        print(f"Error loading FAISS index: {e}")
        raise

# Use an async context manager to handle startup and shutdown
@asynccontextmanager
async def lifespan(app: FastAPI):
    global index, texts
    try:
        embeddings, texts = load_embeddings_and_texts(EMBEDDINGS_PATH, TEXTS_PATH)
        index = load_faiss_index(FAISS_INDEX_PATH)
        if index is None:
            raise ValueError("Failed to load FAISS index.")
        yield  # The application will run while this context is active
    finally:
        print("Application is shutting down.")

# Define the FastAPI app
app = FastAPI(lifespan=lifespan)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Specify your frontend origin
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

app.mount("/static", StaticFiles(directory="static"), name="static")

# Define the request and response models
class QueryRequest(BaseModel):
    user_query: str

class ResponseModel(BaseModel):
    generated_answer: str

# Function to handle query and retrieve relevant chunks
def handle_query(query, model, index, texts, top_k=5):
    query_embedding = model.encode(query)
    distances, indices = index.search(np.array([query_embedding]), top_k)
    
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        chapter, chunk = texts[idx]
        results.append({
            "chapter": chapter,
            "text_chunk": chunk,
            "distance": dist
        })
    
    return results

# Function to generate a response from the LLM using the Hugging Face API
def generate_response(prompt, max_new_tokens=50):
    headers = {
        'Authorization': f'Bearer {API_TOKEN}',
        'Content-Type': 'application/json'
    }
    payload = {
        'inputs': prompt,
        'options': {
            'use_cache': False,
            'max_new_tokens': max_new_tokens
        }
    }

    response = requests.post(f'https://api-inference.huggingface.co/models/{MODEL_NAME}', headers=headers, json=payload)
    
    if response.status_code == 200:
        try:
            return response.json()[0]['generated_text']
        except (KeyError, IndexError) as e:
            print("Unexpected response structure:", response.json(), "Error:", e)
            return None
    else:
        print("Error:", response.status_code, response.text)
        return None

# Function to create a cohesive prompt using top retrieved chunks
def create_prompt(query, top_chunks, max_tokens=900):
    combined_context = ""
    current_token_count = 0

    for chunk in top_chunks:
        chunk_text = chunk["text_chunk"]
        chunk_token_count = len(chunk_text) // 5

        if current_token_count + chunk_token_count <= max_tokens:
            combined_context += f"{chunk_text} "
            current_token_count += chunk_token_count
        else:
            break

    # Constructing the prompt to request an answer with reasoning
    prompt = (
        f"Context: {combined_context.strip()}\n"
        f"Question: {query}\n"
        f"Please provide a detailed answer with reasoning and implications:\n"
        f"Answer:"
    )
    return prompt

# Define the main endpoint for handling query-response interactions
@app.post("/generate-answer", response_model=ResponseModel)
async def generate_answer(query_request: QueryRequest):
    user_query = query_request.user_query

    # Retrieve the top 5 chunks from the FAISS index
    top_results = handle_query(user_query, sentence_model, index, texts, top_k=5)
    
    if not top_results:
        raise HTTPException(status_code=404, detail="No relevant chunks found.")

    # Create a prompt with the top chunks
    final_prompt = create_prompt(user_query, top_results, max_tokens=900)

    # Generate a response using the Hugging Face API
    generated_answer = generate_response(final_prompt, max_new_tokens=100)
    
    if not generated_answer:
        raise HTTPException(status_code=500, detail="Failed to generate a response from the model.")

    # Post-processing logic to enhance the answer's structure
    if len(generated_answer.split()) < 15:  # Check if the answer is too short
        generated_answer += " Please elaborate on your answer."

    # Find the last full stop in the generated answer for truncation
    last_full_stop_idx = generated_answer.rfind('.')
    
    if last_full_stop_idx != -1:
        # If a full stop exists, truncate the answer up to the last full stop
        truncated_answer = generated_answer[:last_full_stop_idx + 1]
    else:
        # If no full stop is found, return the answer as it is
        truncated_answer = generated_answer

    return {"generated_answer": truncated_answer}

# Run the FastAPI app
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)