In [103]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from oauthlib.uri_validate import query



In [104]:
from dotenv import load_dotenv

load_dotenv()

True

In [105]:
import pandas as pd

books = pd.read_csv("books_cleaned.csv")

In [106]:
books


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
0,9.780002e+12,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0
1,9.780002e+12,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0
2,9.780006e+12,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0
3,9.780006e+12,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0
4,9.780006e+12,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0
...,...,...,...,...,...,...,...,...,...,...,...
5194,9.788180e+12,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0
5195,9.788185e+12,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0
5196,9.789028e+12,9027712050,The Berlin Phenomenology,Georg Wilhelm Friedrich Hegel,History,http://books.google.com/books/content?id=Vy7Sk...,Since the three volume edition ofHegel's Philo...,1981.0,0.00,210.0,0.0
5197,,,,,,,,,,,


In [107]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   index = False,
                                   header = False,
                                   lineterminator="\n")

KeyError: 'tagged_description'

In [80]:
with open("tagged_description.txt", "r", encoding="utf-8") as f:
    lines = f.read().splitlines()
documents = [Document(page_content=line) for line in lines]

In [81]:
documents[0]

Document(metadata={}, page_content='"9780002005883.0 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration a

In [108]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings())

In [109]:
query = "A book to teach children about nature"

docs = db_books.similarity_search(query, k=10)
docs

[Document(id='061a63f0-f5a6-4736-acab-6df5c0207f86', metadata={}, page_content='"9780786808069.0 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience."'),
 Document(id='752b5cdb-ad07-4f79-b048-0b0e3824ac8d', metadata={}, page_content='"9780786808069.0 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience."'),
 Document(id='9034e882-2b0e-4f68-9e45-e9935a711bb1', metadata={}, page_content='"9780786808069.0 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs

In [110]:
isbn = docs[0].page_content.split()[0].strip().strip('"')  # remove quotes
isbn = isbn.replace(".0", "")                              # remove .0
books[books["isbn13"] == int(isbn)]


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
3747,9780787000000.0,786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0


In [111]:
def retrieve_semantic_recommendation(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k = 50)

    books_list = []

    for i in range(0, len(recs)):
        isbn_str = recs[i].page_content.strip('"').split()[0]
        isbn_int = int(float(isbn_str))
        books_list.append(isbn_int)

    return books[books["isbn13"].isin(books_list)].head(top_k)



In [112]:
retrieve_semantic_recommendation("A book to teach children about nature")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count
442,9780068000000.0,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0
1639,9780374000000.0,0374422087,Everything on a Waffle,Polly Horvath,Juvenile Fiction,http://books.google.com/books/content?id=NimVJ...,This Newbery Honor Book tells the story of 11 ...,2004.0,3.71,150.0,9631.0
3214,9780690000000.0,0689861133,"Moo, Baa, la la La!",Sandra Boynton,Animal sounds,http://books.google.com/books/content?id=Gz40A...,Children will love joining in and imitating th...,2004.0,4.2,14.0,28261.0
3747,9780787000000.0,0786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0
3748,9780787000000.0,0786808373,Baby Einstein: Birds,Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=0jxHP...,"Introducing your baby to birds, cats, dogs, an...",2002.0,3.78,20.0,9.0
3749,9780787000000.0,0786808381,Baby Einstein: Babies,Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=jv4NA...,"Introduce your babies to birds, cats, dogs, an...",2002.0,4.03,20.0,29.0
3750,9780787000000.0,078680839X,Baby Einstein: Dogs,Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=qut8t...,"Introduce your baby to birds, cats, dogs, and ...",2002.0,3.81,20.0,26.0
3751,9780787000000.0,0786808713,Baby Einstein: What Does Violet See? Raindrops...,Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=95IIA...,A very special puddle sets Violet the mouse of...,2002.0,3.25,18.0,16.0
3760,9780787000000.0,0786812915,The Big Box,Toni Morrison;Slade Morrison,Juvenile Fiction,http://books.google.com/books/content?id=LyYKA...,"In her first illustrated book for children, th...",2002.0,3.95,48.0,375.0
3797,9780789000000.0,0789458209,Tree,David Burnie,Juvenile Nonfiction,http://books.google.com/books/content?id=Qwsqj...,Photographs and text explore the anatomy and l...,2000.0,4.07,64.0,5.0
