In [None]:
from flask import Flask, request, render_template, jsonify, session
import os
import fitz  # PyMuPDF

app = Flask(__name__)
app.secret_key = 'your_secret_key'  # Replace 'your_secret_key' with a real secret key

# Configuration for file uploads
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'pdf'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    doc.close()
    return text

# Implement your logic here to use extracted_text and query to generate a response
def answer_query(extracted_text, query):
    # from langchain.embeddings.openai import OpenAIEmbeddings
    from langchain.text_splitter import CharacterTextSplitter
    from langchain.vectorstores import FAISS
    from langchain_openai import OpenAIEmbeddings
    
    # Download embeddings from OpenAI
    import os
    os.environ["OPENAI_API_KEY"] = "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"
    embeddings = OpenAIEmbeddings()
    
    # We need to split the text using Character Text Split such that it sshould not increase token size
    text_splitter = CharacterTextSplitter(
        separator = "\n",
        chunk_size = 800,
        chunk_overlap  = 200,
        length_function = len,
    )
    texts = text_splitter.split_text(extracted_text)
    print(len(texts))
    document_search = FAISS.from_texts(texts, embeddings)
    print(document_search)
    
    from langchain.chains.question_answering import load_qa_chain
    from langchain.llms import OpenAI
    
    chain = load_qa_chain(OpenAI(), chain_type="stuff")
#     query = "What is the topic of this document?"
    docs = document_search.similarity_search(query)
    response = chain.run(input_documents=docs, question=query)
    return response
    
    
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({'error': 'No file part'}), 400
    file = request.files['file']
    if file.filename == '' or not allowed_file(file.filename):
        return jsonify({'error': 'No selected file or file type not allowed'}), 400
    
    filename = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
    file.save(filename)
    extracted_text = extract_text_from_pdf(filename)
    session['extracted_text'] = extracted_text
    return jsonify({'message': 'File successfully uploaded'}), 200

@app.route('/query', methods=['POST'])
def handle_query():
    query = request.json.get('query', '')
    if not query or 'extracted_text' not in session:
        return jsonify({'error': 'Empty query or no document uploaded'}), 400
    extracted_text = session['extracted_text']
    response = answer_query(extracted_text, query)
    return jsonify({'answer': response}), 200

if __name__ == '__main__':
    if not os.path.exists(UPLOAD_FOLDER):
        os.makedirs(UPLOAD_FOLDER)
    app.run(debug=True)