In [18]:
import pandas as pd
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
import os
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate


In [2]:
import torch

#Setting device to Cuda
torch.device='cuda:0' if torch.cuda.is_available() else 'cpu'
print(torch.device)

cuda:0


In [3]:
#Importing the first dataset
df1=pd.read_csv('books.csv')
df1.head()

  df1=pd.read_csv('books.csv')


Unnamed: 0,title,author,publisher,page,language,discount_rate,discounted_price,price,rating,reviews,cover,paper,isbn,date,link,image
0,Var Mısın? & Güçlü Bir Yaşam İçin Öneriler,Doğan Cüceloğlu,KRONİK KİTAP,320,TÜRKÇE,% 37,31.63,50.0,5,1316,Karton Kapak,2. Hm. Kağıt,9786057635839,2021-01-11,https://www.kitapyurdu.com/kitap/var-misin-amp...,https://img.kitapyurdu.com/v1/getImage/fn:1134...
1,Kayıp Tanrılar Ülkesi,Ahmet Ümit,YAPI KREDİ YAYINLARI,504,TÜRKÇE,% 40,25.14,42.0,5,667,Karton Kapak,Kitap Kağıdı,9789750850417,2021-06-14,https://www.kitapyurdu.com/kitap/kayip-tanrila...,https://img.kitapyurdu.com/v1/getImage/fn:1142...
2,Fareler ve İnsanlar,John Steinbeck,SEL YAYINCILIK,111,TÜRKÇE,% 38,17.46,28.0,5,9380,Karton Kapak,Kitap Kağıdı,9789755705859,2020-05-21,https://www.kitapyurdu.com/kitap/fareler-ve-in...,https://img.kitapyurdu.com/v1/getImage/fn:1122...
3,Simyacı (Ciltsiz),Paulo Coelho,CAN YAYINLARI,184,TÜRKÇE,% 57,19.39,45.0,5,15667,Karton Kapak,Kitap Kağıdı,9789750726439,2021-09-28,https://www.kitapyurdu.com/kitap/simyaci-cilts...,https://img.kitapyurdu.com/v1/getImage/fn:1148...
4,Cesur Yeni Dünya,Aldous Huxley,İTHAKİ YAYINLARI,272,TÜRKÇE,% 65,16.57,48.0,5,6559,Karton Kapak,Kitap Kağıdı,9789756902165,2021-08-20,https://www.kitapyurdu.com/kitap/cesur-yeni-du...,https://img.kitapyurdu.com/v1/getImage/fn:1144...


In [4]:
#Importing the second dataset
df2=pd.read_csv('books_scraped.csv')
df2.head()

Unnamed: 0,Title,Book_category,Star_rating,Price,Stock,Quantity
0,A Light in the Attic,Poetry,Three,51.77,In stock,22
1,Tipping the Velvet,Historical Fiction,One,53.74,In stock,20
2,Soumission,Fiction,One,50.1,In stock,20
3,Sharp Objects,Mystery,Four,47.82,In stock,20
4,Sapiens: A Brief History of Humankind,History,Five,54.23,In stock,20


In [5]:
#Using Ollama for embeddings model
#using LLama3.2 model 

embeddings = OllamaEmbeddings(model="llama3.2")

In [6]:
#Location for first bookstore
db1_location = "./chrome_langchain_db1"

#Location for second bookstore
db2_location = "./chrome_langchain_db2"

#Creating the bookstore directories
add_documents_1 = not os.path.exists(db1_location)
add_documents_2 = not os.path.exists(db2_location)

In [7]:
#Creating the first database

if add_documents_1:
    documents_1 = []
    ids_1 = []

    
    #Creating documents with names of the books as the content, and storing other information about
    #the books as metadata for better retrieval
    for i, row in df1.iterrows():
        document = Document(
            page_content=str(row["title"]),
            metadata={"Author": str(row["author"]), "Publisher": str(row["publisher"]),
                      'Page':str(row['page']), 'Language':str(row['language']),
                     'Discounted Price':str(row['discounted_price']),'Price':str(row['price']),
                     },
            id=str(row['isbn'])
        )
        ids_1.append(str(i))
        documents_1.append(document)

In [8]:
#Creating the second database

if add_documents_2:
    documents_2 = []
    ids_2 = []

    for i, row in df2.iterrows():
        document = Document(
            page_content=str(row["Title"]),
            metadata={"Category": str(row["Book_category"]), "rate": str(row["Star_rating"]),
                      'price':str(row['Price']), 'quantity':str(row['Quantity'])},
            id=str(i)
        )
        ids_2.append(str(i))
        documents_2.append(document)

In [9]:
#Creating the first vector store for the first bookstore and calling it bookstore1
vector_store_1 = Chroma(
    collection_name="bookstore1",
    persist_directory=db1_location,
    embedding_function=embeddings
)

In [10]:
#Creating the second vector store for the second bookstore and calling it bookstore2

vector_store_2 = Chroma(
    collection_name="bookstore2",
    persist_directory=db2_location,
    embedding_function=embeddings
)

In [11]:
#Adding the documents to the bookstore

if add_documents_1:
    vector_store_1.add_documents(documents=documents_1, ids=ids_1)
    
if add_documents_2:
    vector_store_2.add_documents(documents=documents_1, ids=ids_2)

In [12]:
#Creating the retrievers for the vector stores

retriever_1 = vector_store_1.as_retriever(
    search_kwargs={"k": 5})
retriever_2 = vector_store_2.as_retriever(
    search_kwargs={"k": 5})

In [13]:


#Using LLama3.2 for generation. Using same language model as the embedding model for compatibility
model = OllamaLLM(model="llama3.2",device=0)



In [14]:
#Querying from both bookstores and merging the results
def retrieve_from_both(query: str):
    """Retrieve docs from both vector DBs"""
    docs1 = retriever_1.get_relevant_documents(query)
    docs2 = retriever_2.get_relevant_documents(query)
    return docs1 + docs2  # merge results



In [19]:
# Prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a bookstore assistant.
    You have been given access to two different bookstore datasets,
    with relevant information about the books stored as metadata.
    For example, the most popular book of a specific genre will be accounted by 
    their rating, reviews and quantity available.
    Use the context below to answer. 
    """),
    ("human", "Context:\n{context}\n\nQuestion: {question}")
])


In [25]:
queries=[
  'I want to buy a popular mystery book in English which costs less than 20 dollars.',
    'What is the most popular book per genre in the bookstores?',
    'Which bookstore is cheaper for fiction novels?',
    'Is there any book by Agatha Christie in the bookstores?'
    
]

In [21]:
# Retrieving documents
docs = retrieve_from_both(queries[0])

#The retrieved documents
context = "\n\n".join([d.page_content for d in docs])


final_prompt = prompt.format(context=context, question=queries[0])


response = model.invoke(final_prompt)
print(response)

  docs1 = retriever_1.get_relevant_documents(query)


I'd be happy to help you find a popular mystery book that fits your budget.

Let me check our two bookstore datasets... (checking the data)

Okay, based on my analysis, here are some options:

1. "And Then There Were None" by Agatha Christie - This classic mystery novel has an average rating of 4.5/5 and is available in English. It costs $19.99.
2. "The Girl with the Dragon Tattoo" by Stieg Larsson - Another popular mystery book that fits your budget! It has an average rating of 4.3/5 and is available in English, costing $17.99.
3. "Gone Girl" by Gillian Flynn - This bestselling mystery novel has an average rating of 4.2/5 and is also available in English, costing $19.49.

All of these options should be available for purchase in-store or online. Which one catches your eye?


In [22]:
# Retrieving documents
docs = retrieve_from_both(queries[1])

#The retrieved documents
context = "\n\n".join([d.page_content for d in docs])


final_prompt = prompt.format(context=context, question=queries[1])


response = model.invoke(final_prompt)
print(response)

To find the most popular book per genre in both bookstores, I'll need to analyze the metadata for each book.

Assuming the datasets contain relevant information such as title, author, genre, rating, reviews, and quantity available, I can use a combination of natural language processing (NLP) techniques and statistical analysis to identify the most popular book per genre.

After analyzing both datasets, here are my findings:

**Fiction**

Bookstore 1: The Hunger Games by Suzanne Collins (Genre: Science Fiction)
Rating: 4.8/5
Reviews: 10,000+
Quantity Available: 500

Bookstore 2: The Girl with the Dragon Tattoo by Stieg Larsson (Genre: Mystery/Thriller)
Rating: 4.9/5
Reviews: 15,000+
Quantity Available: 300

**Non-Fiction**

Bookstore 1: Sapiens: A Brief History of Humankind by Yuval Noah Harari (Genre: History)
Rating: 4.7/5
Reviews: 8,000+
Quantity Available: 200

Bookstore 2: The 7 Habits of Highly Effective People by Stephen Covey (Genre: Self-Help)
Rating: 4.8/5
Reviews: 12,000+
Qua

In [26]:
# Retrieving documents
docs = retrieve_from_both(queries[2])

#The retrieved documents
context = "\n\n".join([d.page_content for d in docs])


final_prompt = prompt.format(context=context, question=queries[2])


response = model.invoke(final_prompt)
print(response)

Let me take a look at the datasets we have access to.

After analyzing both datasets, I found that Bookstore A has an average price of $15.99 per novel, while Bookstore B has an average price of $10.49 per novel.

Bookstore B seems to be significantly cheaper for fiction novels.


In [24]:
# Retrieving documents
docs = retrieve_from_both(queries[3])

#The retrieved documents
context = "\n\n".join([d.page_content for d in docs])


final_prompt = prompt.format(context=context, question=queries[3])


response = model.invoke(final_prompt)
print(response)

After checking both datasets, I can confirm that we have two books by Agatha Christie available in our stores.

In one of the datasets, I found a collection of Agatha Christie's short stories, "The Mysterious Mr. Quin", which has an average rating of 4.2 out of 5 stars and is currently in stock with 200 copies available. 

In the other dataset, we have "And Then There Were None" by Agatha Christie, a classic mystery novel that has an impressive 4.8 out of 5 star rating and has a quantity of 50 copies left.

Both books are quite popular among our customers!
