In [3]:
!pip install langchain-openai
!pip install langchain-huggingface 
!pip install faiss-cpu


# :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``



In [None]:
import pandas as pd
import json
from pprint import pprint
import os

import json
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
# from langchain.embeddings import OpenAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from IPython.display import display, Markdown
from dotenv import load_dotenv, find_dotenv
from langchain.document_loaders import TextLoader, CSVLoader, JSONLoader, PyPDFLoader
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS


In [None]:
filename = "Resources\AmazonHomeKitchenReviews.csv"
# Load dataset (modify path as needed)
df = pd.read_csv(filename)
df.head(1)
print(len(df))
df_renamed = df.rename(columns={'title_y' : 'product_title','title_x':'review_title','text':'review_text'})

754079


In [None]:
#replace with your own env file containing API keys
load_dotenv(find_dotenv('Resources\keys.env'))
huggingfacehubapi = os.getenv('HuggingfaceRead')
# print(huggingfacehubapi)


In [7]:

def load_docs(doc):
    loader = CSVLoader(doc)
    return loader.load()

In [8]:
docs = load_docs(filename)  # Change the filename accordingly
print(docs[0].page_content) 

: 59
rating: 5
title_x: Adorable!
text: These are so sweet. I do wish the stopper part was a little longer in length but they work great.
images: []
asin: B01HBWGU80
parent_asin: B01DR2ACA0
user_id: AGKHLEW2SOWHNMFQIJGBECAF7INQ
timestamp: 2019-07-23 04:29:16.671
helpful_vote: 0
verified_purchase: True
title_y: Little Bird Wine Bottle Stopper, Silicone Stoppers, Reusable, Leak Proof, Cute, Fun, Decorative, Multipack (Assorted Color, Set of 6)
description: []
price: 9.49
Brand: LouisChoice
Material: Silicone
Color: Assorted Color
categories: ['Home & Kitchen', 'Kitchen & Dining', 'Kitchen Utensils & Gadgets', 'Bar & Wine Tools', 'Wine Stoppers & Pourers', 'Wine Stoppers']


In [9]:
# embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")





In [None]:
def load_docs(df, start_index=0):
    """Convert DataFrame into a list of Documents from a specific index onward."""
    docs = []
    for idx, row in df.iloc[start_index:].iterrows():
        # content = f"Title: {row.get('review_title', '')}. Review: {row.get('review_text', '')}"
        content = f"Product: {row.get('product_title', 'Unknown')}. Price: ${row.get('price', 'N/A')}.Rating: {row.get('rating', 'N/A')} stars. Color: {row.get('Color', 'N/A')}. Categories: {row.get('categories', 'N/A')}. ReviewTitle: {row.get('review_title', '')}. Review: {row.get('review_text', '')}"   
        doc = Document(page_content=content, metadata={"rating": row.get("rating", "N/A"),"price": row.get("price", "N/A"),"product_title": row.get("product_title", "N/A"),"parent_asin": row.get("parent_asin", "N/A") ,"index": idx })
        docs.append(doc)
        # print(idx)
    return docs

In [None]:
existing_size = 0    
# Get new documents to embed
docs = load_docs(df_renamed, start_index=existing_size)


In [23]:
print(len(docs))
print((docs[1]))

754079
page_content='Product: Fortune Candy 8-Inch Fry Pan with Lid, 3-ply Skillet, 18/8 Stainless Steel, Dishwasher Safe, Induction Ready, Silver (Mirror Finish). Price: $24.99.Rating: 5 stars. Color: Mirror Finish. Categories: ['Home & Kitchen', 'Kitchen & Dining', 'Cookware', 'Pots & Pans', 'Skillets']. ReviewTitle: Stailess, healthier than coated pans. Review: Great little stainless steel, balanced, good weight, frying pan with lide' metadata={'rating': 5, 'price': 24.99, 'product_title': 'Fortune Candy 8-Inch Fry Pan with Lid, 3-ply Skillet, 18/8 Stainless Steel, Dishwasher Safe, Induction Ready, Silver (Mirror Finish)', 'parent_asin': 'B08C7JYKZH', 'index': 1}


In [None]:
faiss_index_path = "Resources/vector"  

batch_size = 500 
# Check if FAISS index exists
if os.path.exists(faiss_index_path):
    print("Loading existing FAISS index...")
    vector_store = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
    existing_size = vector_store.index.ntotal  # Number of vectors stored
    print(f"Existing FAISS index has {existing_size} embeddings.")
else:
    print("Creating new FAISS index...")
    # vector_store = None
    vector_store = FAISS.from_documents(docs[:batch_size], embedding_model)
    vector_store.save_local(faiss_index_path)

Creating new FAISS index...


In [None]:
def store_incrementally_in_faiss(docs, faiss_index_path, batch_size=500):
    """Loads existing FAISS index, adds new embeddings in batches, and saves back."""

    # Ensure the directory exists
    if not os.path.exists(os.path.dirname(faiss_index_path)):
        os.makedirs(os.path.dirname(faiss_index_path))

    # Check if FAISS index exists
    if os.path.exists(faiss_index_path):
        print("🔄 Loading existing FAISS index...")
        vector_store = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
        existing_size = vector_store.index.ntotal  # Number of vectors stored
        print(f"📊 Existing FAISS index contains {existing_size} embeddings.")
        start_index = existing_size//batch_size
    else:
        print("🆕 Creating new FAISS index...")
        vector_store = None
        existing_size = 0

    # Get only new documents
    new_docs = docs[existing_size:]
    
    if not new_docs:
        print("✅ No new documents to embed. FAISS index is up-to-date.")
        return

    # Process remaining documents in batches and save each batch as they are generated
    # Even if this fails , it can start from where it left off 
    for i in range(start_index, len(new_docs), batch_size):
        batch = new_docs[i:i + batch_size]
        print(f"🛠️ Processing batch {i // batch_size + 1} with {len(batch)} documents starting from index {start_index}...")
        vector_store.add_documents(batch)  # Always add to the existing vector store
        vector_store.save_local(faiss_index_path)
        existing_size = vector_store.index.ntotal  # Number of vectors stored
        start_index = existing_size//batch_size

    # Save updated FAISS index after all batches are processed
    #vector_store.save_local(faiss_index_path)
    
    # Check if FAISS index was created
    if os.path.exists(faiss_index_path + ".index"):
        print(f"✅ FAISS index successfully saved at: {faiss_index_path}")
    else:
        print("❌ FAISS index was NOT created! Check for errors.")

In [40]:
def load_faiss_and_chat(faiss_index_path=faiss_index_path):
    """Loads FAISS and creates a chatbot using Hugging Face LLM."""
    
    # Load FAISS vector store
    vector_store = FAISS.load_local(faiss_index_path, embedding_model,allow_dangerous_deserialization=True)

    # Use a Hugging Face model as the LLM
    llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1"
                         , model_kwargs={"temperature": 0.7}
                         ,huggingfacehub_api_token = huggingfacehubapi)

    # Create a QA chain using retrieval
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())

    while True:
        query = input("\nAsk a question (or type 'exit' to quit): ")
        if query.lower() == "exit":
            print("Goodbye!")
            break
        response = qa_chain.run(query)
        print(f"\n🤖 Chatbot: {response}")

# Run the chatbot
load_faiss_and_chat()





🤖 Chatbot: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Product: Breville Fresh and Furious Blender, Silver, BBL620SIL. Price: $169.99.Rating: 5 stars. Color: Silver. Categories: ['Home & Kitchen', 'Kitchen & Dining', 'Small Appliances', 'Blenders', 'Countertop Blenders']. ReviewTitle: Powerful Blender.. Review: Powerful Blender.  You can use it to make many different things.  It is easy to take apart and clean.  Crushes ice really well.  Blends even better when making smoothies or Ice cream shakes.  I can't wait to make soups in this thing.  It is a powerful blender.

Product: Breville Fresh and Furious Blender, Silver, BBL620SIL. Price: $169.99.Rating: 5 stars. Color: Silver. Categories: ['Home & Kitchen', 'Kitchen & Dining', 'Small Appliances', 'Blenders', 'Countertop Blenders']. ReviewTitle: a little noisy but powerful. Review: would highly recommend.  I've had ot




🤖 Chatbot: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Product: Mooca 6 Tier Acrylic Eyeglasses Frame Stand, Sunglasses Rack, Sunglasses Stand Acrylic Sunglasses Display, Sunglasses Rack Holder, Acrylic Glass Display, 8”H. Price: $14.99.Rating: 1 stars. Color: 6 Frame Stand. Categories: ['Home & Kitchen', 'Home Décor Products', 'Home Décor Accents', 'Display Stands', 'Risers']. ReviewTitle: Not recommended for sunglasses - too small!. Review: Disappointed! Not enough space to hold sunglasses! This is a better option for small reading glasses 👎🏻

Product: Mooca 6 Tier Acrylic Eyeglasses Frame Stand, Sunglasses Rack, Sunglasses Stand Acrylic Sunglasses Display, Sunglasses Rack Holder, Acrylic Glass Display, 8”H. Price: $14.99.Rating: 1 stars. Color: 6 Frame Stand. Categories: ['Home & Kitchen', 'Home Décor Products', 'Home Décor Accents', 'Display Stands', 'Risers']. R




🤖 Chatbot: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Product: Fortune Candy 8-Inch Fry Pan with Lid, 3-ply Skillet, 18/8 Stainless Steel, Dishwasher Safe, Induction Ready, Silver (Mirror Finish). Price: $24.99.Rating: 5 stars. Color: Mirror Finish. Categories: ['Home & Kitchen', 'Kitchen & Dining', 'Cookware', 'Pots & Pans', 'Skillets']. ReviewTitle: A good pan. Review: The quality is there. I bought some baking soda cleaner to keep the inside looking like new. I’m so used to the coatings on cookware so I’m reprograming myself to use a low heat.<br />I’d recommend.

Product: DELARLO Whole body Tri-Ply Stainless Steel 10inch Frying Pan and 6Quarts Sauté Pans set, Oven safe induction skillet,pots and pans set,Suitable for All Stove. Price: $66.49.Rating: 5 stars. Color: Silver. Categories: ['Home & Kitchen', 'Kitchen & Dining', 'Cookware', 'Pots & Pans', 'Skillets'].




🤖 Chatbot: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Product: TigQiang 40ft LED Rope Lights, Dimmable Indoor Outdoor Rope Lights, Waterproof, 432 Warm White LEDs, Flexible Connectable Cuttable, 110V 3000K, for Bedroom Garden Patio Stairs Balcony Party. Price: $36.99.Rating: 5 stars. Color: Warm White. Categories: ['Home & Kitchen', 'Seasonal Décor', 'Seasonal Lighting', 'Rope Lights']. ReviewTitle: Great for a small bedroom.. Review: I was using this for a while to light up my room. I should have ordered a longer one to give me a more fully lit room. But i'm content with the results.

Product: PMS 17inch 72 LEDs Cherry Blossom Tree Lights Desk Top Bonsai Tree Lamp with Low Voltage Transformer, Ideal for Christmas Wedding Party Bedroom Home Decoration (Red). Price: $32.99.Rating: 5 stars. Color: Red. Categories: ['Home & Kitchen', 'Seasonal Décor', 'Trees']. ReviewT

KeyboardInterrupt: Interrupted by user