In [15]:
import langchain
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS, pinecone
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
from tqdm.autonotebook import tqdm
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv()

True

In [21]:
llm_gemini = ChatGoogleGenerativeAI(
    model="gemini-pro", google_api_key=os.getenv("GOOGLE_API_KEY")
)

llm_gemini

ChatGoogleGenerativeAI(model='models/gemini-pro', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x729b75b7d6c0>, async_client=<google.ai.generativelanguage_v1beta.services.generative_service.async_client.GenerativeServiceAsyncClient object at 0x729b75b7e9b0>, default_metadata=())

In [None]:
pdf_pages = []
docs_path = "pdfs/PMB271"

for file in tqdm(os.listdir(docs_path)):
    file_path = os.path.join(docs_path, file)
    pdf_loader = PyPDFLoader(file_path, extract_images=True)
    pdf_pages += pdf_loader.load_and_split()

faiss_index = FAISS.from_documents(pdf_pages, GoogleGenerativeAIEmbeddings())

In [None]:
print(len(pdf_pages))

In [9]:
pdf_file = "pdfs/PMB271/PMB 271-A Brief History Of Microbiology.pdf"

pdf_loader = PyPDFLoader(pdf_file, extract_images=True)
pages = pdf_loader.load_and_split()

pages[0]


Document(page_content="HISTORICAL DEVELOPMENT OF MICROBIOLOGY AND THE EFFECTS ON HEALTH  \nA Brief History of Microbiology . Microbiology has had a long, rich history, initially centered in the causes of \ninfectious diseases but now including practical applications of the science. Many individuals have made significant \ncontributions to the development of microbiology.  \nEarly history of microbiology.  Historians are unsure who made the first observations of microorganisms, but the \nmicroscope was available during the mid‐1600s, and an E nglish scientist named  Robert Hooke  made key \nobservations. He is reputed to have observed strands of fungi among the specimens of cells he viewed. In the 1670s \nand the decades thereafter, a Dutch merchant named  Anton van Leeuwenhoek  made careful observatio ns of \nmicroscopic organisms, which he called  animalcules.  Until his death in 1723, van Leeuwenhoek revealed the \nmicroscopic world to scientists of the day and is regarded as one of 

In [13]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

faiss_index_db = FAISS.from_documents(
    pdf_pages, GoogleGenerativeAIEmbeddings(model="models/embedding-001")
)

In [14]:
retreiver = faiss_index_db.as_retriever()

retreiver.invoke('Who is the dutch merchannt?')

[Document(page_content='Martinus Beijerinck (1851 - 1931) developed an elective medium (one which\nuses nutritional requirements to limit what can grow on a plate). Beijerinck\nwanted to isolate the root nodule bacterium Rhizobium, which is capable of\nfixing atmospheric nitrogen. To do this he designed a medium containing no\nnitrogenous  compounds.  This  inhibited  the  growth  of  non-nitrogen  fixing\nmicroorganisms and produced a pure culture of Rhizobium. Beijerinck went on\nto use another elective medium, based on the ability of certain microorganisms\nto use CO2 as a carbon source under anaerobic conditions, to isolate the first\npure culture of sulphur-oxidizing bacterium Thiobacillus denitrificans in 1904. \nAlthough chemicals, such as dyes, had been known to have antimicrobial\neffects since 1885, (when Paul Ehrlich published work on the inhibitory effect\nof  arsenic  compounds  on  syphilis)  they  were  not  incorporated  into  media\nformulations until the first selecti

In [22]:
prompt = ChatPromptTemplate.from_template(
"""Answer the question based only on the following context:

{context}

Question: {question}
"""
)

chain = (
    {"context": retreiver, "question": RunnablePassthrough()}
    | prompt
    | llm_gemini
    | StrOutputParser()
)

In [23]:
ChatGoogleGenerativeAI


langchain_google_genai.chat_models.ChatGoogleGenerativeAI

In [26]:
chain.invoke("Who isolated the first pure culture of sulphur-oxidizing bacterium and in what year?")

'Martinus Beijerinck isolated the first pure culture of sulphur-oxidizing bacterium Thiobacillus denitrificans in 1904.'