In [1]:
import os
import streamlit as st
import pickle
import time
from dotenv import load_dotenv
import langchain
from langchain import LLMChain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import PyPDFLoader
from langchain import OpenAI, PromptTemplate
import glob

import google.generativeai as genai
from google.generativeai import GenerativeModel
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

# Upgrade typing_extensions to fix ImportError
%pip install --upgrade typing_extensions

Note: you may need to restart the kernel to use updated packages.


In [2]:
load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')

genai.configure()

In [3]:
MODEL="gemini-2.0-flash-lite"

In [4]:
llm = ChatGoogleGenerativeAI(model=MODEL, temperature=0.5)

In [5]:
file_path = r'/Users/shardulgore/Documents/Projects/Research-Paper-Summarizer/papers/Envisioning_Medclip_A_Deep_Dive_into_Explainability_for_Medical_Vision-Language_Models.pdf'

In [6]:
def summarize(file_path, MODEL):
    loader = PyPDFLoader(file_path)
    docs = loader.load_and_split()
    llm = ChatGoogleGenerativeAI(model=MODEL, temperature=0.5) 
    chain = load_summarize_chain(llm, chain_type='map_reduce')
    summary = chain.invoke(docs)

    return summary

In [7]:
# if os.path.exists(file_path):
# 	summ = summarize(file_path, MODEL)
# 	print(summ['output_text'])
# else:
# 	print(f"File path {file_path} is not a valid file or url")

In [8]:
CUSTOM_PROMPT = "Write a detailed summary of the methodology used in following paper. Give output in markdown format."
vectorDB_path = 'faiss_store'

In [9]:
loader = PyPDFLoader(file_path)
docs = loader.load_and_split()
prompt_template = CUSTOM_PROMPT + """
Answer the following question based only on the provided context, do not use any external information.:

<context>
{text}
</context>
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=['text'])
llm = ChatGoogleGenerativeAI(model=MODEL, temperature=0.5)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_documents(docs, embeddings)
vector_store.save_local(vectorDB_path)

vectorstore = FAISS.load_local(vectorDB_path, embeddings, allow_dangerous_deserialization=True)
# chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())

: 

In [None]:
result = chain.invoke({"question": PROMPT.template, "context": docs})

In [None]:
# result = chain.invoke({"question": prompt_template}, return_only_outputs=True)