# Recommendation System of Ml and Data Science books from Amazon

## Libraries used:
- langchain, chromadb, openai, csv loader, opeanai embeddings, retrievalQA

In [18]:
# import libraries

from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
import chromadb
from langchain.embeddings import OpenAIEmbeddings
import openai
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.chat_models import ChatOpenAI

In [19]:
load_dotenv()

True

In [4]:
# load the csv file in langchain

csv_loader = CSVLoader('amazon_ml_ds_books_cleaned.csv')
csv_file = csv_loader.load()

In [5]:
# split the text into chunks

csv_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

csv_document = csv_splitter.split_documents(csv_file)

len(csv_document)


8427

In [9]:
# find a specific text in the csv file

csv_document[1]

Document(page_content='Title: Machine Learning for Absolute Beginners: A Plain English Introduction (Third Edition) (Machine Learning with Python for Beginners Book Series 1)\nAuthors: []\nPrice: 0.0\nStars: 3\nNumber of Reviews: 64\nCountry: Australia', metadata={'source': 'amazon_ml_ds_books_cleaned.csv', 'row': 1})

In [8]:
# create a vector store with langchain chroma

# 1) persist a directory 

persist_directory = 'chroma_db'

# # 2) create the embeddings

embeddings = OpenAIEmbeddings()

# # 3) create the vector store

vector_db = Chroma.from_documents(documents=csv_document, embedding=embeddings, persist_directory=persist_directory)

In [12]:
# persist the vector store to disk

vector_db.persist()
vector_db = None

In [9]:
# Load the vector store from disk and use it

vector_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

In [10]:
# Create a retriever

retriever = vector_db.as_retriever(search_kwargs={'n': 10})


In [11]:
# Checking the type of research with the retriever

retriever.search_type

'similarity'

In [30]:
# create the prompt template

prompt_template = """ You are an amazon book recommender system. Use the following pieces of context to answer the question and recommend a book to the user.

if you don't know the answer, just say "I don't know".

{context}

Question: {question}
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

In [31]:
# Create the Chain for the chat with the retriever and the prompt template

qachain = RetrievalQA.from_chain_type(llm=ChatOpenAI(
                                    model_name = "gpt-3.5-turbo",
                                    temperature=0.1,
                                    verbose=False), 
                                    chain_type='stuff',
                                    chain_type_kwargs={'prompt': PROMPT},
                                    retriever=retriever)

In [32]:
doc = qachain('could you recommend a book on Pandas with a price less than 100 dollars?')
import pprint
pprint.pprint(doc['result'])

('I recommend the book "Python for Data Analysis" by Wes McKinney. It is a '
 'comprehensive guide to data analysis using the Pandas library and is priced '
 'at $49.99.')


In [28]:
# inititalize the chatbot question and answering with PROMPT 

QUERY = "Any book to recommend on deep learning and computer vision?"

doc_prompt = qachain({"query": QUERY})

import pprint

pprint.pprint(doc_prompt['result'])

('Based on the information provided, I would recommend the book "Deep Learning '
 'illustriert: Eine anschauliche Einführung in Machine Vision, Natural '
 'Language Processing und Bilderzeugung für Programmierer und Datenanalysten" '
 'by Jon Krohn. It has received 5 stars and has 59 reviews, making it a highly '
 'regarded book on deep learning and computer vision. Additionally, it is '
 'priced at 39.0, which is quite reasonable. I hope you find this '
 'recommendation helpful!')
