In [None]:
%pip install langchain
%pip install openai
%pip install pandas
%pip install langchain_community
%pip install pinecone-client
%pip install tqdm
%pip install llama-index
%pip install llama-index-vector-stores-pinecone
%pip install -q cohere llama-index-postprocessor-cohere-rerank

In [31]:
import os

In [1]:
#creation of index
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(os.environ['PINECONE_API_KEY'])

In [None]:
pc.create_index(
    name="benchmark-book-data",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [2]:
index = pc.Index("benchmark-book-data")
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 5001}},
 'total_vector_count': 5001}

In [3]:
from langchain.document_loaders import CSVLoader
from openai import OpenAI
openai.api_key = os.environ['OPENAI_API_KEY']
current_directory = os.getcwd()

In [4]:
client = OpenAI()
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [None]:
loader =  CSVLoader(file_path= current_directory + '\\benchmark_book_dataset.csv',
                    metadata_columns=['Title', 'description','authors','publisher','publishedDate','categories','Price', 'profileName', 'review/score', 'review/summary', 'review/text'],
                    source_column="Title",
                    encoding='utf-8',
    csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['Title', 'description','authors','publisher','publishedDate','categories','Price', 'profileName', 'review/score', 'review/summary', 'review/text'],
})

In [None]:
data = loader.load()

In [None]:
data[1].metadata

{'source': 'Whispers of the Wicked Saints',
 'row': 1,
 'Title': 'Whispers of the Wicked Saints',
 'description': 'Julia Thomas finds her life spinning out of control after the death of her husband, Richard. Julia turns to her minister for comfort when she finds herself falling for him with a passion that is forbidden by the church. Heath Sparks is a man of God who is busy taking care of his quadriplegic wife who was seriously injured in a sever car accident. In an innocent effort to reach out to a lonely member of his church, Heath finds himself as the man and not the minister as Heath and Julia surrender their bodies to each other and face the wrath of God. Julia finds herself in over her head as she faces a deadly disease, the loss of her home and whispers about her wicked affair. Julia leaves the states offering her body as a living sacrifice in hopes of finding a cure while her heart remains thousands of miles away hoping to one day reunite with the man who holds it hostage.Whispe

In [None]:
# Combine relevant fields to create the page_content
for doc in data:
    metadata = doc.metadata
    page_content = f"Title: {metadata.get('Title', '')}\nDescription: {metadata.get('description', '')}\nAuthors: {metadata.get('authors', '')}\nPublisher: {metadata.get('publisher', '')}\nPublishedDate: {metadata.get('publishedDate', '')}\nCategories: {metadata.get('categories', '')}\nReview Summary: {metadata.get('review/summary', '')}\nReview Text: {metadata.get('review/text', '')}\nPrice: {metadata.get('Price', '')}"
    doc.page_content = page_content

In [None]:
data[3444].metadata

{'source': 'The Jacket',
 'row': 3444,
 'Title': 'The Jacket',
 'description': 'When Greg Eagles discovers his old college jacket in a mysterious warehouse, he is sent back in time to his youth with a chance for a new beginning. But the past holds horrors that Greg and his new friend, Wanda Jagr, must fight to save their lives and their futures. When the two friends meet the malignant professor, Stan Beazel, Fundy University becomes a campus of terror.',
 'authors': "['Charles Elgee']",
 'publisher': 'iUniverse',
 'publishedDate': '2000-09-01',
 'categories': "['Fiction']",
 'Price': '12.95',
 'profileName': 'Beth Robichaud',
 'review/score': '5',
 'review/summary': 'Awesome Plot! A Real Original.',
 'review/text': "I thoroughly enjoyed reading this book! The story was original, and the author's style of writing flows very nicely. It's a Suspense/Thriller told with a bit of humour. I'm looking forward to reading more from this author in the future."}

In [None]:
#storing embeddings and the corresponding metadata for the first 5000 rows
from tqdm import tqdm

for i, doc in tqdm(enumerate(data), total=min(len(data), 5000)):
    index_value = i + start_index
    print(index_value)
    text = data[index_value].page_content
    metadata = data[index_value].metadata
    vector = get_embedding(text)
    print(metadata['Title'])
    index.upsert([
        {
            "id": f"{metadata['row']}",
            "values": vector,
            "metadata": metadata
        }
    ])

In [38]:
#Imports for using various RAG techniques
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.openai import OpenAI as OAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.query_engine import TransformQueryEngine, RetrieverQueryEngine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core import VectorStoreIndex, PromptTemplate, ServiceContext

In [6]:
vector_store = PineconeVectorStore(pinecone_index=index, text_key="row")
vector_store.index_name = "benchmark-book-data"

In [9]:
def prettify_review(response):
    print("Review Summary:")
    print(response.response)
    print("\nDetails:")

    for node in response.source_nodes:
        metadata = node.node.metadata
        print(f"\nTitle: {metadata['Title']}")
        print(f"Row: {node.node}")
        print(f"Author(s): {', '.join(eval(metadata['authors']))}")
        print(f"Price: ${metadata['Price']}")
        print(f"Published Date: {metadata['publishedDate']}")
        print(f"Publisher: {metadata['publisher']}")
        print(f"Rating: {metadata['review/score']} stars")
        print(f"Review Summary: {metadata['review/summary']}")
        print(f"Review Text: {metadata['review/text']}")
        print(f"Source: {metadata['source']}")



In [7]:
index_vector = VectorStoreIndex.from_vector_store(vector_store=vector_store)
llm = OAI(model="gpt-3.5-turbo", temperature=0.0)
text_qa_template_str = """
You are an expert Q&A system that is trusted around the world for your factual accuracy.
Always answer the query using the provided context information, and not prior knowledge. Ensure your answers are fact-based and accurately reflect the context provided.
Some rules to follow:
1. Never directly reference the given context in your answer.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
3. Provide responses that are informative and descriptive, such that the response looks as if it was answered by a human.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer:
"""
text_qa_template = PromptTemplate(text_qa_template_str)

In [19]:
#naive rag
query_engine_naive = index_vector.as_query_engine(llm = llm,
                                           text_qa_template=text_qa_template,
                                           similarity_top_k=10,
                                           embed_model=OpenAIEmbedding(model="text-embedding-3-small"))

In [76]:
prettify_review(query_engine_naive.query("What is the review of the book, Whispers of the Wicked Saints"))

Review Summary:
The reviews for "Whispers of the Wicked Saints" vary. Some readers found the book to be captivating and engaging, praising its unique storyline and emotional depth. They appreciated the author's ability to draw them into the lives of the characters and create a compelling narrative. However, there were also criticisms about errors in the book, particularly on the back cover, which some readers found disappointing. Despite this, many readers still enjoyed the book and recommended it to others, highlighting its heartwarming and realistic elements. Overall, opinions on the book were mixed, with some readers loving it and others pointing out flaws.

Details:

Title: Whispers of the Wicked Saints
Author(s): Veronica Haddon
Price: $10.95
Published Date: 2005-02
Publisher: iUniverse
Rating: 5 stars
Review Summary: The truth about WOTWS
Review Text: I have to say that my friends at work all read this book and loved it. It was brought to our attention that there are negative rev

In [33]:
#llm rerank
from llama_index.core.postprocessor import LLMRerank

llm_rerank = LLMRerank(choice_batch_size=15, top_n=10)
query_engine_llm_rerank = index_vector.as_query_engine(
    similarity_top_k=10,
    text_qa_template=text_qa_template,
    node_postprocessors=[llm_rerank],
    embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
    llm=llm
)

In [34]:
prettify_review(query_engine_llm_rerank.query("What are the negative reviews for the book, Whispers of the Wicked Saints"))

Review Summary:
The negative reviews for the book "Whispers of the Wicked Saints" highlight various issues such as poor grammar, typographical errors, weak plot, and lack of character development. Reviewers expressed disappointment in the quality of writing, with some even suggesting that the book may have been written by a middle-schooler. Additionally, there were mentions of misspellings throughout the book, distracting readers from the story. Overall, the negative reviews criticized the book for its lack of substance and quality, leading to a general consensus that it was not worth the money spent on it.

Details:

Title: Whispers of the Wicked Saints
Row: Node ID: 26
Text: 26.0
Author(s): Veronica Haddon
Price: $10.95
Published Date: 2005-02
Publisher: iUniverse
Rating: 1 stars
Review Summary: You've got to hand it to the author...
Review Text: I purchased this book after reading rave reviews of it included within a review of a really good book I'd read. Apparently, the author (or 

In [27]:
from llama_index.postprocessor.cohere_rerank import CohereRerank

# Cohere Rerank
cohere_rerank = CohereRerank(api_key=os.environ['CO_API_KEY'], top_n=5)
query_engine_cohere_rerank = index_vector.as_query_engine(
    similarity_top_k=5,
    text_qa_template=text_qa_template,
    node_postprocessors=[cohere_rerank],
    llm = llm,
    verbose=True,
    embed_model=OpenAIEmbedding(model="text-embedding-3-small")
)

In [16]:
prettify_review(query_engine_cohere_rerank.query("What are the negative reviews for the book, Whispers of the Wicked Saints"))

Review Summary:
The negative reviews for the book "Whispers of the Wicked Saints" criticize the writing style, plot, and character development. Reviewers mention poor grammar, weak plot, and lack of substance in the novel. They highlight instances of unrealistic events and questionable character actions, such as sudden intimate encounters and improbable scenarios. Additionally, there are comments about the book being poorly edited and self-promoted, with suggestions that the positive reviews may not be genuine. Overall, the negative reviews express disappointment in the quality of the book and advise against purchasing it.

Details:

Title: Whispers of the Wicked Saints
Row: Node ID: 25
Text: 25.0
Author(s): Veronica Haddon
Price: $10.95
Published Date: 2005-02
Publisher: iUniverse
Rating: 1 stars
Review Summary: Awful beyond belief!
Review Text: I feel I have to write to keep others from wasting their money. This book seems to have been written by a 7th grader with poor grammatical sk

In [36]:
from llama_index.core.indices.query.query_transform import HyDEQueryTransform

# HyDE
hyde = HyDEQueryTransform(include_original=True)
query_engine_hyde = TransformQueryEngine(query_engine_naive, hyde)

In [40]:
prettify_review(query_engine_hyde.query("What are the negative reviews for the book, Whispers of the Wicked Saints"))

Review Summary:
The negative reviews for the book "Whispers of the Wicked Saints" highlight various issues such as poor writing quality, weak plot development, typographical errors, and lack of editing. Reviewers mentioned instances of misspellings, grammatical errors, and unrealistic plot elements that made the reading experience unpleasant. Some reviewers also expressed disappointment in the book's content and felt that it did not live up to their expectations.

Details:

Title: Whispers of the Wicked Saints
Row: Node ID: 26
Text: 26.0
Author(s): Veronica Haddon
Price: $10.95
Published Date: 2005-02
Publisher: iUniverse
Rating: 1 stars
Review Summary: You've got to hand it to the author...
Review Text: I purchased this book after reading rave reviews of it included within a review of a really good book I'd read. Apparently, the author (or her friends and family) are placing reviews of good books and encouraging people to buy this book if you liked the one they were reviewing. Great m

In [None]:
# Maximal Marginal Relevance (MMR)
query_engine_mmr = index_vector.as_query_engine(vector_store_query_mode="mmr",
                                         similarity_top_k=5,
                                          embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
                                          llm=llm)

In [None]:
prettify_review(query_engine_mmr.query("What are the negative reviews for the book, Whispers of the Wicked Saints"))

In [None]:
# HyDE + LLM Rerank
query_engine_hyde_llm_rerank = TransformQueryEngine(query_engine_llm_rerank, hyde)

In [110]:
prettify_review(query_engine_hyde_llm_rerank.query("Who is the author of the book Whispers of the Wicked Saints?"))

Review Summary:
The author of the book "Whispers of the Wicked Saints" is Veronica Haddon.

Details:

Title: Whispers of the Wicked Saints
Row: Node ID: 21
Text: 21.0
Author(s): Veronica Haddon
Price: $10.95
Published Date: 2005-02
Publisher: iUniverse
Rating: 4 stars
Review Summary: Wonderful, wonderful book !!
Review Text: Just as predicted the first chapter of the book was long. I continued reading and then fell in love with WOTWS. It is heart warming and realistic. I thought it was a little depressing, but I quickly realized the writers point. There is a rainbow at the end of each storm. I did notice the error on the cover, but it only made me love the writer more. This writer is talented and she is not corporate, nor does it appear that she has a staff working for her. Her book was writen from the heart and that is what makes an American literary legend. I would rather read a book with an error then to read hollow perfection any day of the week.
Source: Whispers of the Wicked Sain

In [28]:
# Hyde + Cohere Rerank
query_engine_hyde_cohere_rerank = TransformQueryEngine(query_engine_cohere_rerank, hyde)

In [30]:
prettify_review(query_engine_hyde_cohere_rerank.query("Who are the reviewers who gave a negative review for the book, The Impatient Gardeners Lawn book?"))

Review Summary:
The reviewers who gave negative reviews for the book "The Impatient Gardener's Lawn Book" are Dorothy Muench and Dee Dixon.

Details:

Title: The Impatient Gardener's Lawn Book
Row: Node ID: 75
Text: 75.0
Author(s): Jerry Baker
Price: $12.95
Published Date: 1987
Publisher: Random House Digital, Inc.
Rating: 1 stars
Review Summary: Gardening lawn book
Review Text: Four pages of the book was missing. It just happened to be the formulas that my husband needed. Fortunately he was able to copy our friends book. But I will hesitate to buy another book like this again.
Source: The Impatient Gardener's Lawn Book

Title: The Impatient Gardener's Lawn Book
Row: Node ID: 72
Text: 72.0
Author(s): Jerry Baker
Price: $12.95
Published Date: 1987
Publisher: Random House Digital, Inc.
Rating: 4 stars
Review Summary: Pretty good, although a few flaws
Review Text: This is, all-around, a pretty good lawn book. However, there are a few cases where the advice is a little outdated given the l

In [87]:
# Multi Query
vector_retriever = index_vector.as_retriever(similarity_top_k=10)
retriever_multi_query = QueryFusionRetriever(
    [vector_retriever],
    similarity_top_k=10,
    num_queries=5,
    llm=llm,
    mode="reciprocal_rerank",
    use_async=False,
    verbose=True
)
query_engine_multi_query = RetrieverQueryEngine.from_args(retriever_multi_query,
                                                          verbose=True,
                                                          embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
                                                          llm=llm)

In [88]:
prettify_review(query_engine_multi_query.query("List the books that are published by 'Bloomsbury Publishers'"))

Generated queries:
1. "Best-selling books published by Bloomsbury Publishers"
2. "New releases from Bloomsbury Publishers"
3. "Historical fiction books by Bloomsbury Publishers"
4. "Award-winning books by Bloomsbury Publishers"
Review Summary:
There are no books in the provided context information that are published by 'Bloomsbury Publishers'.

Details:

Title: Sultans of Swing - The Very Best of Dire Straits (Fretted)
Author(s): Dire Straits
Price: $17.9
Published Date: 1999-04
Publisher: Hal Leonard Corporation
Rating: 1 stars
Review Summary: poorly done !
Review Text: Not sure what the other person was smoking when reviewing this book, but must be a beginner to Dire Straits. This was very poorly done. In many places, the tabs are incorrect. It looks like the transcriber just did what they thought or liked rather than validate it. I would not have bought this book if I had known how wrong it was. I suppose on a plus note it provides an alternate method. Also, the print it too small.D