In [1]:
import os
os.environ["TQDM_DISABLE"] = "1"                # disable tqdm globally
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" # disable HF model/download bars
os.environ["TOKENIZERS_PARALLELISM"] = "false"   # avoids extra worker noise


In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from tqdm import tqdm 

In [3]:
import pandas as pd
books = pd.read_csv('books_cleaned.csv')
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...


In [4]:
books['tagged_description']

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [5]:
# convert tagged_description to a text file since langchain is not good with pandas dataframe
books["tagged_description"].to_csv('tagged_description.txt',
                                   sep = '\n',
                                   index = False,
                                   header = False)

In [6]:
raw_documents = TextLoader('tagged_description.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 800
Created a chunk of size 1214, which is longer than the specified 800
Created a chunk of size 960, which is longer than the specified 800
Created a chunk of size 843, which is longer than the specified 800
Created a chunk of size 881, which is longer than the specified 800
Created a chunk of size 1088, which is longer than the specified 800
Created a chunk of size 1189, which is longer than the specified 800
Created a chunk of size 1267, which is longer than the specified 800
Created a chunk of size 887, which is longer than the specified 800
Created a chunk of size 2010, which is longer than the specified 800
Created a chunk of size 1225, which is longer than the specified 800
Created a chunk of size 1184, which is longer than the specified 800
Created a chunk of size 1214, which is longer than the specified 800
Created a chunk of size 1191, which is longer than the specified 800
Created a chunk of size 1057, which is

In [7]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [8]:
emb = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings':True}
    )

db = Chroma.from_documents(
    documents,
    embedding=emb
    )

In [9]:
query = "A book for soccer lover"
docs = db.similarity_search(query, k = 10)
docs

[Document(id='61faea90-c05d-4177-8fde-312cfefcdcaf', metadata={'source': 'tagged_description.txt'}, page_content='9781573226882 An autobiographical memoir by a humorous British author and obsessed soccer fan captures the intensity of a sports fan who measures his life in seasons rather than years\n9781573227162 A debut novel explores the complications of race through the story of two daughters--one light-skinned and the other dark-skinned--of a black father and a white mother, who become torn apart by racial allegiances. Reprint.\n9781573229623 A memoir of sex, drugs, and depression indicts an overmedicated America as it chronicles the fortunes of a Harvard educated child of divorce who lived in the fast lane as a music critic, always fighting her chronic depression.'),
 Document(id='7dbd8a07-1124-499b-943f-5998383e7297', metadata={'source': 'tagged_description.txt'}, page_content="9781857442021 Mikhail Tal, the 'magician from Riga, ' was the greatest attacking World Champion of them a

In [10]:
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
4700,9781573226882,1573226882,Fever Pitch,Nick Hornby,Biography & Autobiography,http://books.google.com/books/content?id=v5TO8...,An autobiographical memoir by a humorous Briti...,1998.0,3.73,247.0,27781.0,Fever Pitch,9781573226882 An autobiographical memoir by a ...


In [15]:
def semantic_recommendation_list(query:str,top_k:int = 10) -> pd.DataFrame:
    recs = db.similarity_search(query, k = 10)

    book_list = []
    for i in range(0, len(recs)):
        book_list +=[int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(book_list)]

In [17]:
semantic_recommendation_list("a books to teach love")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
156,9780060574215,60574216,"Men Are from Mars, Women Are from Venus",John Gray,Family & Relationships,http://books.google.com/books/content?id=MUw_d...,Rediscover the most famous relationship book e...,2004.0,3.54,368.0,126108.0,"Men Are from Mars, Women Are from Venus: The C...",9780060574215 Rediscover the most famous relat...
211,9780060753634,60753633,Mating in Captivity,Esther Perel,Psychology,http://books.google.com/books/content?id=-HIhM...,A guide for loving couples who are looking to ...,2006.0,4.13,272.0,7699.0,Mating in Captivity: Reconciling the Erotic an...,9780060753634 A guide for loving couples who a...
370,9780061129735,61129739,The Art of Loving,Erich Fromm,Self-Help,http://books.google.com/books/content?id=TRMED...,The fiftieth Anniversary Edition of the ground...,2006.0,4.03,192.0,35605.0,The Art of Loving,9780061129735 The fiftieth Anniversary Edition...
424,9780064410724,64410722,Four Stupid Cupids,Gregory Maguire,Juvenile Fiction,http://books.google.com/books/content?id=471OU...,The students' scheme to find a love match for ...,2001.0,3.52,224.0,110.0,Four Stupid Cupids,9780064410724 The students' scheme to find a l...
2278,9780446606813,446606812,Message in a Bottle,Nicholas Sparks,Fiction,http://books.google.com/books/content?id=0uvZR...,"In this New York Times bestseller, a single mo...",1999.0,3.96,370.0,2714.0,Message in a Bottle,9780446606813 In this New York Times bestselle...
2825,9780571206926,571206921,Laughable Loves,Milan Kundera,Czech Republic,http://books.google.com/books/content?id=ZpupP...,Laughable loves is a collection of stories tha...,1999.0,3.87,287.0,14380.0,Laughable Loves,9780571206926 Laughable loves is a collection ...
3428,9780743495929,743495926,Getting the Love You Want,Harville Hendrix,Love,http://books.google.com/books/content?id=Vc56P...,"Originally published in 1988, GETTING THE LOVE...",2005.0,4.07,336.0,5513.0,Getting the Love You Want: A Guide for Couples,"9780743495929 Originally published in 1988, GE..."
3534,9780755304851,755304853,Falling for You,Jill Mansell,Fiction,http://books.google.com/books/content?id=K3zCG...,Love is always just around the corner in a Jil...,2004.0,3.79,448.0,4713.0,Falling for You,9780755304851 Love is always just around the c...
3811,9780802130365,802130364,The Malady of Death,Marguerite Duras,Fiction,http://books.google.com/books/content?id=ZDHyw...,A man hires a woman to spend several weeks wit...,1988.0,3.86,60.0,1280.0,The Malady of Death,9780802130365 A man hires a woman to spend sev...
5102,9781890159191,1890159190,The Mistress Manual,"Mistress Lorelei;Lorelei Powers, Mistress",Family & Relationships,http://books.google.com/books/content?id=DjOZv...,Originally published in 1994 in a simple pink ...,2000.0,3.76,220.0,323.0,The Mistress Manual: A Good Girl's Guide to Fe...,9781890159191 Originally published in 1994 in ...
