In [None]:
!pip install -q --upgrade google-generativeai langchain-google-genai python-dotenv chromadb

In [None]:
# https://makersuite.google.com/

!echo -e 'GOOGLE_API_KEY=GOOGLE_API_KEY' > .env

In [None]:
!ls -a

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from IPython.display import display
from IPython.display import Markdown
import textwrap


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
import google.generativeai as genai

In [None]:
import os
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

In [None]:
model = genai.GenerativeModel(model_name = "gemini-pro")
model

In [None]:
prompt = [
    "Define life",
]

response = model.generate_content(prompt)

In [None]:
to_markdown(response.text)

In [None]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI


In [None]:
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)


In [None]:
data_folder = p.cwd() / "data"
p(data_folder).mkdir(parents=True, exist_ok=True)

pdf_url = "https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf"
pdf_file = str(p(data_folder, pdf_url.split("/")[-1]))

urllib.request.urlretrieve(pdf_url, pdf_file)

In [None]:
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [None]:
pdf_loader = PyPDFLoader(pdf_file)
pages = pdf_loader.load_and_split()

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [None]:
question = "Describe data management and feature management systems."
docs = vector_index.get_relevant_documents(question)

In [None]:
stuff_answer = stuff_chain(
    {"input_documents": docs, "question": question}, return_only_outputs=True
)

In [None]:
pprint(stuff_answer)