In [None]:
import os
import json
import openai
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer

In [None]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
def load_pdf_text(pdf_path):
    reader = PdfReader(pdf_path)
    text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
    return text

def chunk_text(text, chunk_size=300):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

In [None]:
with open("memory.json", "r") as f:
    past_interactions = json.load(f)

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

class VectorIndex:
    def __init__(self, chunks):
        self.chunks = chunks
        self.embeddings = model.encode(chunks)
        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
        self.index.add(np.array(self.embeddings))

    def search(self, query, top_k=3):
        query_embedding = model.encode([query])
        D, I = self.index.search(query_embedding, top_k)
        return [self.chunks[i] for i in I[0]]

In [None]:
def generate_response(query, context_chunks, memory):
    memory_text = "\n".join([f"Q: {m['question']}\nA: {m['answer']}" for m in memory])
    context_text = "\n".join(context_chunks)
    prompt = f"You are a helpful assistant. Use both context and past Q&A to answer.\n\nMemory:\n{memory_text}\n\nContext:\n{context_text}\n\nUser: {query}\nAssistant:"

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print("[OpenAI Fallback]", e)
        return "Based on the available information, it seems related to futuristic virtual worlds." 

In [None]:
pdf_path = "movie_script.pdf"
if not os.path.exists(pdf_path):
    print(f"PDF file '{pdf_path}' not found. Please place it in the same directory.")
else:
    print("\nLoading document...")
    text = load_pdf_text(pdf_path)
    chunks = chunk_text(text)

    print("Indexing...\n")
    index = VectorIndex(chunks)

    query = input("Ask a question about the document: ")
    top_chunks = index.search(query)

    print("\nGenerating response...")
    answer = generate_response(query, top_chunks, past_interactions)

    print("\nFinal Answer:")
    print(answer)