In [1]:
!pip install langchain
!pip install langchain_community
!pip install langchain_text_splitters
!pip install langchain_openai
!pip install langchain_chroma
!pip install dotenv
!pip install python-dotenv
!pip install sentence-transformers



In [2]:
import pandas as pd
import numpy as np

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

In [3]:
from dotenv import load_dotenv
load_dotenv(dotenv_path="/Users/phucnguyen/Desktop/Hands-on-project/semantic-book-recommender/.env")

True

In [4]:
books = pd.read_csv('book_cleaned.csv')
books['tagged_description'].to_csv('tagged_description.txt', 
                                   sep='\n',
                                   index=False, 
                                   header=False)

In [5]:
raw_docs = TextLoader('tagged_description.txt', encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_docs)

Created a chunk of size 1168, which is longer than the specified 1
Created a chunk of size 1214, which is longer than the specified 1
Created a chunk of size 373, which is longer than the specified 1
Created a chunk of size 309, which is longer than the specified 1
Created a chunk of size 483, which is longer than the specified 1
Created a chunk of size 482, which is longer than the specified 1
Created a chunk of size 960, which is longer than the specified 1
Created a chunk of size 188, which is longer than the specified 1
Created a chunk of size 843, which is longer than the specified 1
Created a chunk of size 296, which is longer than the specified 1
Created a chunk of size 197, which is longer than the specified 1
Created a chunk of size 881, which is longer than the specified 1
Created a chunk of size 1088, which is longer than the specified 1
Created a chunk of size 1189, which is longer than the specified 1
Created a chunk of size 304, which is longer than the specified 1
Create

In [6]:
embedding = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
db_books = Chroma.from_documents(
    documents,
    embedding=embedding)

  embedding = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [7]:
query = "a book to teach children about nature"
docs = db_books.similarity_search(query, k=5)
docs

[Document(id='98f3ea00-bca8-472e-b618-558f43fe5d88', metadata={'source': 'tagged_description.txt'}, page_content='"9780067575208 First published more than three decades ago, this reissue of Rachel Carson\'s award-winning classic brings her unique vision to a new generation of readers. Stunning new photographs by Nick Kelsh beautifully complement Carson\'s intimate account of adventures with her young nephew, Roger, as they enjoy walks along the rocky coast of Maine and through dense forests and open fields, observing wildlife, strange plants, moonlight and storm clouds, and listening to the ""living music"" of insects in the underbrush. ""If a child is to keep alive his inborn sense of wonder."" Writes Carson, ""he needs the companionship of at least one adult who can share it, rediscovering with him the joy, excitement and mystery of the world we live in."" The Sense of Wonder is a refreshing antidote to indifference and a guide to capturing the simple power of discovery that Carson v

In [15]:
books[books["isbn13"] == int(docs[0].page_content.strip('"').split()[0])]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,description_length_bin,title_and_subtitle,tagged_description
442,9780067575208,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0,200+,The Sense of Wonder,9780067575208 First published more than three ...


In [18]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k = 50)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(books_list)].head(top_k)
recommendations = retrieve_semantic_recommendations("A book to teach children about nature")
recommendations.to_csv('recommendations.csv', index=False)
recommendations


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,description_length_bin,title_and_subtitle,tagged_description
31,9780007105045,7105045,Tree and Leaf,John Ronald Reuel Tolkien,Literary Collections,http://books.google.com/books/content?id=aPb_A...,"""The two works 'On fairy-stories' and 'Leaf by...",2001.0,4.09,176.0,2245.0,21-50,Tree and Leaf: The Homecoming of Beorhtnoth : ...,"9780007105045 ""The two works 'On fairy-stories..."
59,9780007151240,7151241,The Family Way,Tony Parsons,Parenthood,http://books.google.com/books/content?id=dJEIx...,It should be the most natural thing in the wor...,2005.0,3.51,400.0,2095.0,21-50,The Family Way,9780007151240 It should be the most natural th...
86,9780060000141,60000147,Poppy's Return,Avi,Juvenile Fiction,http://books.google.com/books/content?id=XbcMJ...,"There's trouble at Gray House, the girlhood ho...",2006.0,3.99,256.0,1086.0,51-100,Poppy's Return,"9780060000141 There's trouble at Gray House, t..."
143,9780060546571,60546573,Three Rotten Eggs,Gregory Maguire,Juvenile Fiction,http://books.google.com/books/content?id=t2pWl...,The students of Miss Earth's class in rural Ve...,2005.0,3.74,240.0,76.0,21-50,Three Rotten Eggs,9780060546571 The students of Miss Earth's cla...
146,9780060556501,60556501,"The Lion, the Witch and the Wardrobe (picture ...",C. S. Lewis,Juvenile Fiction,http://books.google.com/books/content?id=FlSpp...,Narnia: A magical land full of wonder and exci...,2004.0,4.19,48.0,5012.0,101-200,"The Lion, the Witch and the Wardrobe (picture ...",9780060556501 Narnia: A magical land full of w...
324,9780060959036,60959037,Prodigal Summer,Barbara Kingsolver,Fiction,http://books.google.com/books/content?id=06IwG...,Barbara Kingsolver's fifth novel is a hymn to ...,2001.0,4.0,444.0,85440.0,51-100,Prodigal Summer: A Novel,9780060959036 Barbara Kingsolver's fifth novel...
373,9780061131622,61131628,Mandy,Julie Andrews Edwards,Juvenile Fiction,http://books.google.com/books/content?id=5dNj_...,"Mandy, a ten-year-old orphan, dreams of a plac...",2006.0,4.24,320.0,9482.0,101-200,Mandy,"9780061131622 Mandy, a ten-year-old orphan, dr..."
404,9780064402453,64402452,Racso and the Rats of NIMH,Jane Leslie Conly,Juvenile Fiction,http://books.google.com/books/content?id=MgoNv...,"‘Racso, a brash and boastful little rodent, is...",1988.0,3.76,288.0,3231.0,51-100,Racso and the Rats of NIMH,"9780064402453 ‘Racso, a brash and boastful lit..."
407,9780064404419,64404412,The Rainbow People,Laurence Yep,Juvenile Fiction,http://books.google.com/books/content?id=5AHwq...,"""Culled from 69 stories collected in a [1930s]...",1992.0,3.75,208.0,202.0,101-200,The Rainbow People,"9780064404419 ""Culled from 69 stories collecte..."
426,9780064420815,64420817,Christmas Stories,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=qJxFQ...,Gentle adaptations of Laura Ingalls Wilder's c...,1998.0,4.22,80.0,63.0,101-200,Christmas Stories,9780064420815 Gentle adaptations of Laura Inga...
