In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from onnxruntime.transformers.large_model_exporter import retrieve_onnx_inputs

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
import pandas as pd

books = pd.read_csv('books_cleaned.csv')

In [5]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...


In [6]:
# use tagged description to match recommendations with titles/author
books["tagged_description"]

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [19]:
# save to .txt for text loader
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

In [20]:
from langchain_core.documents import Document

raw_documents = TextLoader("tagged_description.txt").load()
text = raw_documents[0].page_content
lines = [ln.strip() for ln in text.split("\n") if ln.strip()]

documents = [Document(page_content=ln, metadata={"source": "tagged_description.txt"}) for ln in lines]

In [21]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [23]:
# create vector database
db_books = Chroma.from_documents(
    documents,
    embedding = OpenAIEmbeddings()
)

In [24]:
# test query
query = "A book to teach college students about space"
docs = db_books.similarity_search(query, k=10)
docs

[Document(id='c079273b-a9fd-4118-b993-997fa415c669', metadata={'source': 'tagged_description.txt'}, page_content="9780764584657 A beginner's guide to astronomy features information about the solar system as well as star maps and a monthly guide on locating the planets in the sky."),
 Document(id='8ab3628b-a2cd-4bce-b7b4-d79a65f021c3', metadata={'source': 'tagged_description.txt'}, page_content="9780131871656 This introduction to astronomy features an exceptionally clear writing style, an emphasis on critical thinking and visualization, and a leading-edge technology program-including an accompanying full-featured electronic multimedia version of the book and companion Web site. A dynamic art program includes numerous radio, infrared, ultraviolet, X-ray, and gamma-ray images and transparent full-color overlays. The book presents scientific literacy in the context of astronomy, with the aim of teaching studentsto think critically and analytically about the physical world and the developme

In [25]:
# use isbn to filter
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
3604,9780764584657,764584650,Astronomy For Dummies,Stephen P. Maran,Science,http://books.google.com/books/content?id=iH4Fn...,A beginner's guide to astronomy features infor...,2005.0,3.83,318.0,271.0,Astronomy For Dummies,9780764584657 A beginner's guide to astronomy ...


In [27]:
# create function to return recs from query
def get_recommendations(query:str, top_k: int = 10):
    # get rec
    recs = db_books.similarity_search(query, k=50)

    # empty list for matching books
    books_list = []

    # populate books list
    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    # return books data frame
    return books[books["isbn13"].isin(books_list)].head(top_k)

In [28]:
get_recommendations("A book about a space explorer")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
116,9780060510862,60510862,The Forever War,Joe Haldeman,Fiction,http://books.google.com/books/content?id=jc1eC...,Drafted into the ranks of Earth's interstellar...,2003.0,4.15,278.0,103375.0,The Forever War,9780060510862 Drafted into the ranks of Earth'...
816,9780142402931,142402931,The Far Side of Evil,Sylvia Engdahl,Juvenile Fiction,http://books.google.com/books/content?id=7nijj...,A young girl from an advanced civilization is ...,2005.0,3.98,324.0,57.0,The Far Side of Evil,9780142402931 A young girl from an advanced ci...
833,9780142500378,142500372,Enchantress from the Stars,Sylvia Louise Engdahl,Juvenile Fiction,http://books.google.com/books/content?id=rntJA...,When young Elana unexpectedly joins the team l...,2003.0,3.94,304.0,2031.0,Enchantress from the Stars,9780142500378 When young Elana unexpectedly jo...
930,9780156306300,156306301,Fiasco,Stanislaw Lem,Fiction,http://books.google.com/books/content?id=4IZ3P...,When a crew of earthmen--among them a space pi...,1988.0,4.12,322.0,2125.0,Fiasco,9780156306300 When a crew of earthmen--among t...
1271,9780312852535,312852533,The Humanoids,Jack Williamson,Fiction,http://books.google.com/books/content?id=vPSl0...,A classic science fiction novel features human...,1996.0,3.75,299.0,880.0,The Humanoids: A Novel,9780312852535 A classic science fiction novel ...
1276,9780312864743,312864744,Ports of Call,Jack Vance,Fiction,http://books.google.com/books/content?id=_Zu4S...,A romantic tale follows a space swashbuckler a...,1999.0,3.53,300.0,395.0,Ports of Call,9780312864743 A romantic tale follows a space ...
1279,9780312872793,312872798,Lurulu,Jack Vance,Fiction,http://books.google.com/books/content?id=Jm3au...,"Continues the adventures of Myron Tany, a rebe...",2007.0,3.58,204.0,268.0,Lurulu,9780312872793 Continues the adventures of Myro...
1288,9780312890216,312890214,The Starry Rift,James Tiptree,Fiction,,This novel set in the far-future and filled wi...,1994.0,3.82,250.0,220.0,The Starry Rift,9780312890216 This novel set in the far-future...
1387,9780330340328,330340328,The Reality Dysfunction,Peter F. Hamilton,English fiction,http://books.google.com/books/content?id=uOqwr...,In AD 2600 the human race is finally beginning...,1997.0,4.14,1223.0,26269.0,The Reality Dysfunction,9780330340328 In AD 2600 the human race is fin...
1435,9780340837955,340837950,Stranger in a Strange Land,Robert A. Heinlein,Science fiction,http://books.google.com/books/content?id=ZQhiP...,"Epic, entertaining, Stranger in a Strange Land...",2005.0,3.92,672.0,563.0,Stranger in a Strange Land,"9780340837955 Epic, entertaining, Stranger in ..."
