In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
import pandas as pd
import torch


In [None]:
# this code makes the variables defined in your .env file accessible within your Python script as environment variables
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the Hugging Face API token from the environment
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Ensure the API token is set
if not HUGGINGFACEHUB_API_TOKEN:
    raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env file")

In [None]:
books = pd.read_csv("/content/books_cleaned.csv")
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...


In [None]:
books["tagged_description"]

Unnamed: 0,tagged_description
0,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982 A new 'Christie for Christmas' -...
2,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897 Lewis' work on the nature of lov...
4,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...
5192,9788172235222 On A Train Journey Home To North...
5193,9788173031014 This book tells the tale of a ma...
5194,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535 This collection of the timeless ...


In [None]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

**Splitting text into smaller chunks is useful for:**  

Processing large documents in smaller, more manageable pieces.  

Feeding text into machine learning models that have limits on input size.  

Analyzing or working with individual lines of text.  

In [None]:
# Load a text file (tagged_description.txt)
raw_documents = TextLoader("tagged_description.txt").load()

# Split the text into smaller pieces (chunks) based on new lines
# chunk_size=0: This means there is no limit to the size of each chunk. (It use the entire line as a chunk)
# chunk_overlap=0: This means there will be no overlap between chunks. (Each chunk will be completely separate.)
# separator="\n": This tells the splitter to split the text wherever there is a new line (\n).
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")

# Store these smaller pieces in a list called documents
documents = text_splitter.split_documents(raw_documents)



In [None]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [None]:
# Determine if CUDA is available and set the device accordingly
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Initialize HuggingFaceEmbeddings with the desired model (running locally)
model_name = "all-MiniLM-L6-v2"  # Or another SentenceTransformer model
model_kwargs = {'device': device}
encode_kwargs = {'normalize_embeddings': False}
hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# creating a vector database (also called an embedding database) using the Chroma library.
db_books = Chroma.from_documents(
    documents,
    embedding = hf_embeddings)

In [None]:
query = "A book about neuroscience"

docs = db_books.similarity_search(query, k=10)  # number of results
docs

[Document(id='82dc72b7-e27f-40eb-84ca-979a277e7530', metadata={'source': 'tagged_description.txt'}, page_content="9780618057078 At the heart of this classic, seminal book is Julian Jaynes's still-controversial thesis that human consciousness did not begin far back in animal evolution but instead is a learned process that came about only three thousand years ago and is still developing. The implications of this revolutionary scientific paradigm extend into virtually every aspect of our psychology, our history and culture, our religion -- and indeed our future."),
 Document(id='1876c789-0c5f-4a44-bf57-1fd24ac96d5c', metadata={'source': 'tagged_description.txt'}, page_content='9780674644847 This bold and brilliant book asks the ultimate question of life sciences: How did the human mind acquire its incomparable power? Origins of the Modern Mind traces the evolution of human culture and cognition from primitive apes to the era of artificial intelligence, and presents an original theory of h

In [None]:
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
2893,9780618057078,618057072,The Origin of Consciousness in the Breakdown o...,Julian Jaynes,Philosophy,http://books.google.com/books/content?id=6Q0kS...,"At the heart of this classic, seminal book is ...",2000.0,4.24,491.0,3258.0,The Origin of Consciousness in the Breakdown o...,"9780618057078 At the heart of this classic, se..."


In [None]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 50,) -> pd.DataFrame:

    recs = db_books.similarity_search(query, k = top_k)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(books_list)]

In [None]:
retrieve_semantic_recommendations("A book about neuroscience and happiness")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
74,9780007195718,0007195710,Discover Your Destiny with the Monk Who Sold H...,Robin Sharma,Conduct of life,http://books.google.com/books/content?id=4hVbN...,A potent pathway to self-awakening that will h...,2004.0,3.9,240.0,1956.0,Discover Your Destiny with the Monk Who Sold H...,9780007195718 A potent pathway to self-awakeni...
101,9780060175641,0060175648,Identity,Milan Kundera,Fiction,http://books.google.com/books/content?id=D30Ex...,Milan Kundera's Identity translated from the F...,1998.0,3.68,176.0,260.0,Identity: A Novel,9780060175641 Milan Kundera's Identity transla...
214,9780060760250,0060760257,Charms for the Easy Life,Kaye Gibbons,Fiction,http://books.google.com/books/content?id=TE2mB...,"A family without men, the Birches live gloriou...",2005.0,4.02,272.0,10983.0,Charms for the Easy Life,"9780060760250 A family without men, the Birche..."
285,9780060916466,006091646X,The Dance of Intimacy,Harriet Lerner,Psychology,http://books.google.com/books/content?id=tTKc8...,"In The Dance of Intimacy, the bestselling auth...",1990.0,4.06,255.0,7128.0,The Dance of Intimacy: A Woman's Guide to Cour...,"9780060916466 In The Dance of Intimacy, the be..."
292,9780060925758,0060925752,Soul Mates,Thomas Moore,Psychology,http://books.google.com/books/content?id=7syEl...,This companion volume to Care of the Soul offe...,1994.0,4.0,288.0,4122.0,Soul Mates,9780060925758 This companion volume to Care of...
301,9780060930318,0060930314,Identity,Milan Kundera,Fiction,http://books.google.com/books/content?id=mXPU2...,There are situations in which we fail for a mo...,1999.0,3.68,168.0,13065.0,Identity: A Novel,9780060930318 There are situations in which we...
370,9780061129735,0061129739,The Art of Loving,Erich Fromm,Self-Help,http://books.google.com/books/content?id=TRMED...,The fiftieth Anniversary Edition of the ground...,2006.0,4.03,192.0,35605.0,The Art of Loving,9780061129735 The fiftieth Anniversary Edition...
562,9780140128468,0140128468,Love's Executioner,Irvin D. Yalom,Existential psychotherapy,http://books.google.com/books/content?id=ZBptQ...,Contains eight case-studies of people the auth...,1991.0,4.2,270.0,216.0,Love's Executioner: And Other Tales of Psychot...,9780140128468 Contains eight case-studies of p...
600,9780140244915,0140244913,How the Mind Works,Steven Pinker,Philosophy,http://books.google.com/books/content?id=O521D...,"""Presented with extraordinary lucidity, cogenc...",1999.0,3.97,660.0,225.0,How the Mind Works,"9780140244915 ""Presented with extraordinary lu..."
991,9780192862099,019286209X,The Origins of Life,John Maynard Smith;Eörs Szathmáry,Science,http://books.google.com/books/content?id=nHDbB...,'I can recommend this book as a thoroughly int...,2000.0,4.11,192.0,41.0,The Origins of Life: From the Birth of Life to...,9780192862099 'I can recommend this book as a ...
