In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.vectorstores import FAISS
import pandas as pd
from dotenv import load_dotenv

In [2]:
import torch
print(torch.cuda.is_available())  # Should print True
print(torch.cuda.get_device_name(0))  # Should print your GPU name


True
NVIDIA GeForce RTX 3050 Laptop GPU


In [3]:
load_dotenv()

True

In [16]:
books = pd.read_csv('books_cleaned.csv')

In [17]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,0002005883 A NOVEL THAT READERS and critics ha...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,0002261987 A new 'Christie for Christmas' -- a...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"0006178731 A memorable, mesmerizing heroine Je..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,0006280897 Lewis' work on the nature of love d...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"0006280935 ""In The Problem of Pain, C.S. Lewis..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,8172235224 On A Train Journey Home To North In...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,8173031010 This book tells the tale of a man w...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,"817992162X Wisdom to Create a Life of Passion,..."
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,8185300534 This collection of the timeless tea...


In [18]:
books['tagged_description'].to_csv('tagged_description.txt',
                                   index=False,
                                   sep='\n',
                                   header=False,
                                   encoding='utf-8')

In [3]:
raw_documents = TextLoader('tagged_description.txt', encoding='utf-8').load()

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1165, which is longer than the specified 0
Created a chunk of size 1211, which is longer than the specified 0
Created a chunk of size 370, which is longer than the specified 0
Created a chunk of size 306, which is longer than the specified 0
Created a chunk of size 480, which is longer than the specified 0
Created a chunk of size 479, which is longer than the specified 0
Created a chunk of size 957, which is longer than the specified 0
Created a chunk of size 185, which is longer than the specified 0
Created a chunk of size 840, which is longer than the specified 0
Created a chunk of size 293, which is longer than the specified 0
Created a chunk of size 194, which is longer than the specified 0
Created a chunk of size 878, which is longer than the specified 0
Created a chunk of size 1085, which is longer than the specified 0
Created a chunk of size 1186, which is longer than the specified 0
Created a chunk of size 301, which is longer than the specified 0
Create

In [5]:
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cuda'}
)

# embeddings=OpenAIEmbeddings()

In [7]:
db_books = FAISS.from_documents(
    documents,
    embedding=embeddings
)

In [8]:
# You can optionally save the FAISS index to disk
db_books.save_local("faiss_index")

# To load a previously saved index:
# db_books = FAISS.load_local("faiss_index", embeddings)


In [24]:
def retrieve_semantic_recommendations(query: str, top_k: int = 10) -> pd.DataFrame:
    recs = db_books.similarity_search(query, top_k)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [recs[i].page_content.strip('"').split()[0]]

    print(books_list)

    return books[books['isbn10'].isin(books_list)]

In [38]:
query = 'i want to learn to cook vegan recipes'

In [39]:
retrieve_semantic_recommendations(query, 10)

['0446670782', '1400040353', '1400052386', '0767909267', '1401301940', '1904920357', '1400052378', '0534613039', '1400047331', '1933615095']


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
2297,9780446670784,446670782,Curries Without Worries,Sudha Koul,Cooking,http://books.google.com/books/content?id=G4qUH...,A thorough but accessible guide to Indian cuis...,1996.0,3.98,160.0,35.0,Curries Without Worries,0446670782 A thorough but accessible guide to ...
2621,9780534613037,534613039,Slaughterhouse Blues,Donald D. Stull;Michael J. Broadway,Social Science,http://books.google.com/books/content?id=RQ4pA...,SLAUGHTERHOUSE BLUES: THE MEAT AND POULTRY IND...,2004.0,3.48,172.0,26.0,Slaughterhouse Blues: The Meat and Poultry Ind...,0534613039 SLAUGHTERHOUSE BLUES: THE MEAT AND ...
3678,9780767909266,767909267,The Minimalist Cooks at Home,Mark Bittman,Cooking,http://books.google.com/books/content?id=F0Z6P...,"In a revised cookbook, the author of the award...",2002.0,3.91,240.0,261.0,The Minimalist Cooks at Home: Recipes That Giv...,"0767909267 In a revised cookbook, the author o..."
4341,9781400040353,1400040353,Lidia's Family Table,Lidia Matticchio Bastianich;David Nussbaum,Cooking,http://books.google.com/books/content?id=ZQNwD...,A guide to family cookery features more than t...,2004.0,4.22,448.0,521.0,Lidia's Family Table,1400040353 A guide to family cookery features ...
4355,9781400047338,1400047331,Gale Gand's Short + Sweet,Gale Gand;Julia Moskin,Cooking,http://books.google.com/books/content?id=BloJA...,A collection of dessert recipes that can be cr...,2004.0,3.74,160.0,23.0,Gale Gand's Short + Sweet: Quick Desserts with...,1400047331 A collection of dessert recipes tha...
4362,9781400052370,1400052378,Eat This Book,Tyler Florence,Cooking,http://books.google.com/books/content?id=nF2Pc...,"Collects more than 150 recipes, ranging from a...",2005.0,3.92,287.0,270.0,Eat This Book: Cooking with Global Fresh Flavors,"1400052378 Collects more than 150 recipes, ran..."
4363,9781400052387,1400052386,Tyler's Ultimate,Tyler Florence,Cooking,http://books.google.com/books/content?id=cGxOH...,The popular Food Network chef presents a selec...,2006.0,4.1,256.0,3586.0,Tyler's Ultimate: Brilliant Simple Food to Mak...,1400052386 The popular Food Network chef prese...
4428,9781401301941,1401301940,Jamie's Dinners,Jamie Oliver,Cooking,http://books.google.com/books/content?id=gMDlA...,Cooking sensation Jamie Oliver returns with a ...,2004.0,4.07,336.0,4248.0,Jamie's Dinners: The Essential Family Cookbook,1401301940 Cooking sensation Jamie Oliver retu...
5133,9781904920359,1904920357,50 Great Curries of India 10th Anniversary Ed.,Camellia Panjabi,Cooking,http://books.google.com/books/content?id=h9JfS...,Collects various dishes from all over India - ...,2005.0,4.17,224.0,197.0,50 Great Curries of India 10th Anniversary Ed.,1904920357 Collects various dishes from all ov...
5176,9781933615097,1933615095,The Best of America's Test Kitchen 2007,America's Test Kitchen,Cooking,http://books.google.com/books/content?id=tQ97A...,Presents nearly one thousand recipes--from app...,2006.0,4.34,312.0,185.0,The Best of America's Test Kitchen 2007: The Y...,1933615095 Presents nearly one thousand recipe...
