In [8]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
import pandas as pd

In [14]:
from dotenv import load_dotenv
load_dotenv()

True

In [10]:
books = pd.read_csv("books_cleaned.csv")
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

In [12]:
# raw_documents = TextLoader("tagged_description.txt").load()
# text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
# documents = text_splitter.split_documents(raw_documents)

Here we use OpenAI to embed the description and create our embedded database

In [15]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings())

Now that we have our database, we can query to find the books according to our needs using the similarity_search method from Chroma. However, this will only return the description of the book. As mentioned earlier in the processing file, we will use the ID that we appended to the description to retrieve which book we are referring to using the embedded database and the similarity search. 

In [17]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k = 50)

    books_list = []
    for i in range(0, len(recs)):
        #some description has quotation marks and this might cause the parser to not recognize the ID as integer => remove them
        books_list += [int(recs[i].page_content.strip('"').split()[0])] 

    return books[books["isbn13"].isin(books_list)]

In [18]:
retrieve_semantic_recommendations("A book about love")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
211,9780060753634,0060753633,Mating in Captivity,Esther Perel,Psychology,http://books.google.com/books/content?id=-HIhM...,A guide for loving couples who are looking to ...,2006.0,4.13,272.0,7699.0,Mating in Captivity: Reconciling the Erotic an...,9780060753634 A guide for loving couples who a...
291,9780060924980,0060924985,The Infinite Plan,Isabel Allende,Fiction,http://books.google.com/books/content?id=80pDR...,"Selling more than 65,000 copies and topping be...",1994.0,3.71,384.0,7102.0,The Infinite Plan: A Novel,"9780060924980 Selling more than 65,000 copies ..."
319,9780060956868,0060956860,Joy in the Morning,Betty Smith,Fiction,http://books.google.com/books/content?id=w2uMG...,The story of a young couple from Brooklyn who ...,2000.0,3.9,296.0,5559.0,Joy in the Morning,9780060956868 The story of a young couple from...
329,9780060975005,0060975008,Gold Bug Variations,Richard Powers,Fiction,http://books.google.com/books/content?id=Zesnc...,"A national bestseller, voted by Time as the #1...",1992.0,4.13,640.0,1383.0,Gold Bug Variations,"9780060975005 A national bestseller, voted by ..."
365,9780061122095,0061122092,By the River Piedra I Sat Down and Wept,Paulo Coelho,Fiction,http://books.google.com/books/content?id=9AHal...,"From Paulo Coelho, author of the international...",2006.0,3.57,208.0,68403.0,By the River Piedra I Sat Down and Wept: A Nov...,"9780061122095 From Paulo Coelho, author of the..."
370,9780061129735,0061129739,The Art of Loving,Erich Fromm,Self-Help,http://books.google.com/books/content?id=TRMED...,The fiftieth Anniversary Edition of the ground...,2006.0,4.03,192.0,35605.0,The Art of Loving,9780061129735 The fiftieth Anniversary Edition...
387,9780061177590,0061177598,Women,Charles Bukowski,Fiction,http://books.google.com/books/content?id=170LA...,Tells the story of an ugly middle-aged man who...,2007.0,3.87,291.0,49265.0,Women,9780061177590 Tells the story of an ugly middl...
424,9780064410724,0064410722,Four Stupid Cupids,Gregory Maguire,Juvenile Fiction,http://books.google.com/books/content?id=471OU...,The students' scheme to find a love match for ...,2001.0,3.52,224.0,110.0,Four Stupid Cupids,9780064410724 The students' scheme to find a l...
592,9780140195538,014019553X,The Beloved,Kahlil Gibran;John Walbridge,Fiction,http://books.google.com/books/content?id=KOEKA...,"For Kahlil Gibran (1883-1931), love was the su...",1997.0,4.19,102.0,320.0,The Beloved: Reflections on the Path of the Heart,"9780140195538 For Kahlil Gibran (1883-1931), l..."
644,9780140363333,0140363335,How Do You Lose Those Ninth-Grade Blues?,Barthe DeClements,Obesity,http://books.google.com/books/content?id=1P-Fc...,"Though no longer a fat girl, Elsie, now fiftee...",1993.0,3.74,144.0,10.0,How Do You Lose Those Ninth-Grade Blues?,"9780140363333 Though no longer a fat girl, Els..."
