In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

In [2]:
# pip install langchain-google-genai

In [3]:
#for gemini
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import GoogleGenerativeAI

from langchain.embeddings import HuggingFaceEmbeddings

In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
import pandas as pd
books = pd.read_csv("./ds/cleaned_data.csv")

In [6]:
books["tagged_description"].head(10)

0    9780002005883 A NOVEL THAT READERS and critics...
1    9780002261982 A new 'Christie for Christmas' -...
2    9780006178736 A memorable, mesmerizing heroine...
3    9780006280897 Lewis' work on the nature of lov...
4    9780006280934 "In The Problem of Pain, C.S. Le...
5    9780006380832 Until Vasco da Gama discovered t...
6    9780006470229 A new-cover reissue of the fourt...
7    9780006472612 Kate Blackwell is an enigma and ...
8    9780006482079 Tricked once more by his wily ha...
9    9780006483014 An omnibus volume of the author'...
Name: tagged_description, dtype: object

In [17]:
#saving the tagged_description to a text file as textloader only supports a text file
books["tagged_description"].to_csv("./ds/tagged_description.txt", sep="\n", index=False, header=False)

In [18]:
#notice how the above line outputs " before isbn number whenever the description text also has "" which is not what we want. So we do it this way:
with open("./ds/tagged_description.txt", "w", encoding="utf-8") as f:
    for line in books["tagged_description"]:
        f.write(f"{line}\n")


In [19]:
raw_docs = TextLoader("./ds/tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
docs = text_splitter.split_documents(raw_docs)

Created a chunk of size 1168, which is longer than the specified 0
Created a chunk of size 1214, which is longer than the specified 0
Created a chunk of size 373, which is longer than the specified 0
Created a chunk of size 309, which is longer than the specified 0
Created a chunk of size 479, which is longer than the specified 0
Created a chunk of size 482, which is longer than the specified 0
Created a chunk of size 960, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 843, which is longer than the specified 0
Created a chunk of size 284, which is longer than the specified 0
Created a chunk of size 193, which is longer than the specified 0
Created a chunk of size 877, which is longer than the specified 0
Created a chunk of size 1088, which is longer than the specified 0
Created a chunk of size 1189, which is longer than the specified 0
Created a chunk of size 304, which is longer than the specified 0
Create

In [20]:
docs[5]

Document(metadata={'source': './ds/tagged_description.txt'}, page_content="9780006380832 Until Vasco da Gama discovered the sea-route to the East in 1497-9 almost nothing was known in the West of the exotic cultures and wealth of the Indian Ocean and its peoples. It is this civilization and its destruction at the hands of the West that Richard Hall recreates in this book. Hall's history of the exploration and exploitation by Chinese and Arab travellers, and by the Portuguese, Dutch and British alike is one of brutality, betrayal and colonial ambition.")

In [21]:
# db_books = Chroma.from_documents(docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"), persist_directory="./db/")
# db_books_hf = Chroma.from_documents(docs, embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"), persist_directory="./db2/")

  db_books_hf = Chroma.from_documents(docs, embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"), persist_directory="./db2/")
  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#if using google api

# db_books = Chroma(
#     persist_directory="./db/", 
#     embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
# )
# query = "witchcraft"

In [10]:
db_books = Chroma(embedding_function=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2", model_kwargs={'device': 'cuda'} ), persist_directory="./db2/")
query = "witchcraft"

In [11]:
similars = db_books.similarity_search(query, k=5)
similars

[Document(id='d7ac95d0-5c6b-4483-a7d3-bdb190d062e5', metadata={'source': './ds/tagged_description.txt'}, page_content="9780006514640 Alone and vulnerable, Alys joins a nunnery in an attempt to escape poverty but finds herself thrown back into the outside world when Henry VIII's wreckers destroy her sanctuary. With nothing but her looks, her magic and her own instinctive cunning, Alys has to tread a perilous path between the faith of her childhood and her own female power. When she falls in love with Hugo, the feudal lord and another woman's husband, she dips into witchcraft to defeat her rival and to win her lover, only to find that magic makes a poor servant but a dominant master. Since heresy against the new church means the stake, and witchcraft the rope, Alys's danger is mortal. A woman's powers are not longer safe to use..."),
 Document(id='1911f0bc-e28e-4a65-ae21-340117463e4d', metadata={'source': './ds/tagged_description.txt'}, page_content="9780064410342 In the land of Ingary, 

In [12]:
recommendations = books[ books["isbn13"].isin([int(similar.page_content.split()[0].strip()) for similar in similars ])]
recommendations

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
25,9780006514640,0006514642,The Wise Woman,Philippa Gregory,Great Britain,http://books.google.com/books/content?id=BEr9w...,"Alone and vulnerable, Alys joins a nunnery in ...",2002.0,3.31,640.0,6583.0,The Wise Woman,"9780006514640 Alone and vulnerable, Alys joins..."
162,9780060572976,0060572973,"The Good, the Bad, and the Undead",Kim Harrison,Fiction,http://books.google.com/books/content?id=-hBqy...,"It's a tough life for witch Rachel Morgan, sex...",2005.0,4.26,453.0,53112.0,"The Good, the Bad, and the Undead",9780060572976 It's a tough life for witch Rach...
212,9780060745905,0060745908,Wicked Musical Tie-in Edition,Gregory Maguire,Fiction,http://books.google.com/books/content?id=MLEuj...,When Dorothy triumphed over the Wicked Witch o...,2004.0,3.53,409.0,2747.0,Wicked Musical Tie-in Edition: The Life and Ti...,9780060745905 When Dorothy triumphed over the ...
442,9780064410342,006441034X,Howl's Moving Castle,Diana Wynne Jones,Juvenile Fiction,http://books.google.com/books/content?id=f3RHP...,"In the land of Ingary, such things as spells, ...",2001.0,4.3,329.0,148957.0,Howl's Moving Castle,"9780064410342 In the land of Ingary, such thin..."
3112,9780590032490,0590032496,The witches,Roald Dahl,Juvenile Nonfiction,http://books.google.com/books/content?id=tpQxo...,"A young boy and his Norwegian grandmother, who...",1997.0,4.17,208.0,254867.0,The witches,9780590032490 A young boy and his Norwegian gr...


In [13]:
# get k top recommendations for query
def get_k_recommendations(query: str, k: int=10) -> pd.DataFrame:
    return books[books["isbn13"].isin([int(similar.page_content.split()[0].strip()) for similar in db_books.similarity_search(query,k)])]

In [14]:
recommendations = get_k_recommendations("middle eastern foods", 5)
with pd.option_context('display.max_colwidth', 150):
    print(recommendations["description"])

2038    Covering more than two hundred years of history, a study of American involvement in the Middle East examines the diverse roles of American statesm...
2488    A thorough but accessible guide to Indian cuisine explains spices and seasonings and offers more than sixty-five simple, easy-to-prepare recipes f...
4800    The popular Food Network chef presents a selection of more than 125 sophisticated but accessible dishes, with recipes that range from Crisp Chicke...
5201    Explores the food of India, Pakistan, Bangladesh, Nepal, and Sri Lanka to provide readers with a taste of the Subcontinent through recipes that in...
5619    Collects various dishes from all over India - from the classic Goa Lamb Vindaloo to the exotic Gujarat Mango and Yogurt Curry. This book features:...
Name: description, dtype: object
