In [1]:
from fastapi import FastAPI, UploadFile, File
import fitz  # PyMuPDF
import os
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI

app = FastAPI()

os.environ["OPENAI_API_KEY"] = "your-api-key-here"
qa_chain = None  # Initialize global variable

In [2]:
def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    pdf_document = fitz.open(pdf_path)
    text = ""
    for page_num in range(pdf_document.page_count):
        page = pdf_document.load_page(page_num)
        text += page.get_text()
    return text

@app.post("/upload-pdf/")
async def upload_pdf(file: UploadFile = File(...)):
    """Upload a PDF, extract text, process it, and set up the QA system."""
    global qa_chain
    
    # Save PDF Temporarily
    file_path = f"temp_{file.filename}"
    with open(file_path, "wb") as buffer:
        buffer.write(await file.read())

    # Extract Text
    extracted_text = extract_text_from_pdf(file_path)
    if not extracted_text.strip():
        return {"error": "PDF extraction failed. The text is empty."}

    # Split Text into Chunks
    documents = [Document(page_content=extracted_text)]
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    texts = text_splitter.split_documents(documents)

    # Convert to FAISS Vector Store
    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_documents(texts, embeddings)
    retriever = vector_store.as_retriever()

    # Initialize QA System
    llm = OpenAI(openai_api_key="your-api-key-here")
    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

    return {
        "message": "PDF uploaded and QA system is ready",
        "text_preview": extracted_text[:500],  # Show first 500 characters
    }

In [3]:
@app.post("/ask-question/")
async def ask_question(query: str):
    """Answer user questions based on the uploaded PDF content."""
    if not qa_chain:
        return {"error": "No PDF uploaded. Please upload a PDF first."}
    
    answer = qa_chain.invoke(query)
    return {"query": query, "answer": answer}

In [None]:
import nest_asyncio
import uvicorn

nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=8000)