In [1]:
import textwrap
import chromadb
import numpy as np
import pandas as pd

import google.generativeai as genai
import google.ai.generativelanguage as glm
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from chromadb import Documents, EmbeddingFunction, Embeddings

import os
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

gemini_api_key = os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=gemini_api_key)

In [6]:
# testing out gemini embeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
len(vector)

768

Lets work on chunking the pdfs

In [6]:
from langchain_community.document_loaders import PyPDFLoader

In [10]:
loader = PyPDFLoader("pdfs/Book on VA Loans_ An Essential Guide to Maximizing Your Home Loan Benefits, The - Chris Birk.pdf")
pages = loader.load_and_split()

In [12]:
pages[14]

Document(page_content='DCHAPTER  1:\nVA LOANS  IN\nTODAY’S MARKET\nespite  t he  rece nt  economic  freefall , homeownership  remains  a\ncornerstone  of  American  soc iety . Nearly  70 percent  of  citizens  own  a\nhome , w hich  most  see  as  a  path  toward  wealth  creation  and  ﬁnancial\nstability . Rates  for  homeownership  among  veterans  are  even  higher ,\nright  around  82 percent , according  to  the  Mortgage  Bankers\nAssociation .\nThe las t  few  years  have  proved  a  mixed  bag  for  homebuyers  and\nexisting  homeowners . Purchasing  a  home  today  looks  a  b it  dif ferent\nthan  it  did  six  or  seven  years  ago . For  many  people  it’ s  increasingly\ndifﬁcult . Borrowers  who  qualify  have  reaped  the  beneﬁts  of\ngovernment-sponsored  tax  credit  programs  and  record-low  interest\nrates . At  the  same  time , the  subprime  mortgage  meltdown  and\nensuing  ﬁnancial  crisis  created  a  restrictive  credit  environment  and\nmade  it  signiﬁca

In [13]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())
docs = faiss_index.similarity_search("How will the community be engaged?", k=2)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:300])

205: Epilogue : Can  You  Help  Us ?
Faced  with  an  inﬁnite  uphill  battle , SoCalV AHomes  maintains  an
intense  commitment  to  expand  and  ﬁnd  creative  ways  to  serve  those
who served  us .
The business  model  of  choosing  to  exclusively  serve  active
military  and  V eterans  is  a  very
18: serving  this  community , certainly  stirred  a  wave  of  patriotic  feelings  in
our souls .
These  feelings  grew  deeper  until  we  were  on  a  mission . W e
executed  several  “fully  custom  renovated  home”  transactions  which
were  very , very  difﬁcult . It  was  both  taxing  and  exhi


In [15]:
docs = faiss_index.similarity_search("What are the limitations of the VA Loan?", k=5)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:300])

28: the lack  of  higher  V A  loan  limits  –  they  were  ef fectively  locked  out . As
home  prices  declined  from  the  peak  in  2006 and  V A loan  limits  were
raised , our  market  woke  up ! In  2008, V A  home  buyers  began  coming
out from  hiding ! And  in  2011, several  thousand  had  n
59: property  and  the  loan  limit . As  an  example , let's  just  say  the  loan  limit
is $500,000 and  you  want  to  buy  a  $ 600,000 house . Y ou  could  get  a
$575,000 loan  because  you  only  have  to  cover  25% of  the  dif ference
between  the  $ 600,000 purchase  price  and  the  $ 500,0
32: and others  are  also  eligible . Contact  the  Department  of
V eterans  Af fairs  for  more  information .
Generally  speaking , when  shopping  for  a  mortgage  loan , you  will
ﬁnd four  fundamental  home  loan  categories  that  have  both
similarities  and  also  major  dif ferences : convent
98: Chapter  14: Gold  Mine  or  Coal  Mine ? The  CalVet
Loan
Advantages  and  Disad