# Redis LangChain Llama-2 eCommerce Chatbot

In [1]:
# Install requirements
!pip install -r requirements.txt



In [2]:
# Install llama.cpp python with metal support  https://github.com/abetlen/llama-cpp-python and https://github.com/ggerganov/llama.cpp
!CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.1.77.tar.gz (1.6 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting typing-extensions>=4.5.0 (from llama-cpp-python)
  Obtaining dependency information for typing-extensions>=4.5.0 from https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl.metadata
  Downloading typing_extensions-4.7.1-py3-none-any.whl.metadata (3.1 kB)
Collecting numpy>=1.20.0 (from llama-cpp-python)
  Obtaining dependency information for numpy>=1.20.0 from https://files.pythonhosted.org/packages/c3/ea/1d95b399078ecaa7b5d791e1fdbb3aee272077d9fd5fb499593c87dec5ea/numpy-

In [3]:
# Download the dataset
!gdown --id 1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj

Downloading...
From (uriginal): https://drive.google.com/uc?id=1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj
From (redirected): https://drive.google.com/uc?id=1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj&confirm=t&uuid=7573f967-07b7-478d-83d3-1588f8815c00
To: /Users/kyousaf/Dev/Development3/github/redis-langchain-chatbot/product_data.csv
100%|████████████████████████████████████████| 225M/225M [00:06<00:00, 32.1MB/s]


## Preprocess dataset

In [4]:
import pandas as pd

MAX_TEXT_LENGTH=512

def auto_truncate(val):
    """Truncate the given text."""
    return val[:MAX_TEXT_LENGTH]

# Load Product data and truncate long text fields
all_prods_df = pd.read_csv("product_data.csv", converters={
    'bullet_point': auto_truncate,
    'item_keywords': auto_truncate,
    'item_name': auto_truncate
})

In [5]:
# Contruct a primary key from item ID and domain name
all_prods_df['primary_key'] = (
    all_prods_df['item_id'] + '-' + all_prods_df['domain_name']
)
# Replace empty strings with None and drop
all_prods_df['item_keywords'].replace('', None, inplace=True)
all_prods_df.dropna(subset=['item_keywords'], inplace=True)

# Reset pandas dataframe index
all_prods_df.reset_index(drop=True, inplace=True)

all_prods_df.head()

Unnamed: 0,item_id,marketplace,country,main_image_id,domain_name,bullet_point,item_keywords,material,brand,color,item_name,model_name,model_number,product_type,primary_key
0,B07T6RZ2CM,Amazon,IN,71dZhpsferL,amazon.in,3D Printed Hard Back Case Mobile Cover for Len...,mobile cover back cover mobile case phone case...,,Amazon Brand - Solimo,Others,Amazon Brand - Solimo Designer Couples Sitting...,Lenovo K4 Note,gz8115-SL40423,CELLULAR_PHONE_CASE,B07T6RZ2CM-amazon.in
1,B07T2JY31Y,Amazon,IN,71vX7qIEAIL,amazon.in,3D Printed Hard Back Case Mobile Cover for Son...,mobile cover back cover mobile case phone case...,Wood,Amazon Brand - Solimo,others,Amazon Brand - Solimo Designer Leaf on Wood 3D...,Sony Xperia Z1 L39H,gz8056-SL40528,CELLULAR_PHONE_CASE,B07T2JY31Y-amazon.in
2,B0849YGSCZ,Amazon,AE,A1EZF-2mB5L,amazon.ae,,small de fur rooms navidad woven girls shag pa...,,Stone & Beam,,Stone & Beam Contemporary Doily Wool Farmhouse...,,I59I8044IVYGRYC00-Parent,HOME_FURNITURE_AND_DECOR,B0849YGSCZ-amazon.ae
3,B081K6TCML,Amazon,IN,81o9EyZ-fAL,amazon.in,Solimo Plastic Multipurpose Modular Drawer; sm...,drawer modular drawer 3 rack modular drawer ki...,Plastic,Amazon Brand - Solimo,Multicolor,Amazon Brand - Solimo Plastic Multipurpose Mod...,,sol_cujo_13,HOME,B081K6TCML-amazon.in
4,B0854774X5,Amazon,IN,81xaJCVnl3L,amazon.in,"Snug fit for Nokia 8.1, with perfect cut-outs ...",Back Cover Designer Case Designer Take It Easy...,Silicon,Amazon Brand - Solimo,Multicolor,Amazon Brand - Solimo Designer Take It Easy UV...,Nokia 8.1,UV10714-SL40617,CELLULAR_PHONE_CASE,B0854774X5-amazon.in


In [6]:
# Num products to use (subset)
NUMBER_PRODUCTS = 2500  

# Get the first 1000 products with non-empty item keywords
product_metadata = ( 
    all_prods_df
     .head(NUMBER_PRODUCTS)
     .to_dict(orient='index')
)

In [7]:
# Check one of the products
product_metadata[0]

{'item_id': 'B07T6RZ2CM',
 'marketplace': 'Amazon',
 'country': 'IN',
 'main_image_id': '71dZhpsferL',
 'domain_name': 'amazon.in',
 'bullet_point': '3D Printed Hard Back Case Mobile Cover for Lenovo K4 Note Easy to put & take off with perfect cutouts for volume buttons, audio & charging ports. Stylish design and appearance, express your unique personality. Extreme precision design allows easy access to all buttons and ports while featuring raised bezel to life screen and camera off flat surface. Slim Hard Back Cover No Warranty None',
 'item_keywords': 'mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Lenovo back case hard case 3D printed mobile cover mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Lenovo back case hard case 3D printed mobile cover mobile cover back cover mobile case phone case mobile panel phone panel Lenovo mobile case Lenovo phone cover Lenovo 

## Set up Redis as a vector db

In [8]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.redis import Redis as RedisVectorStore

# data that will be embedded and converted to vectors
texts = [
    v['item_name'] for k, v in product_metadata.items()
]

# product metadata that we'll store along our vectors
metadatas = list(product_metadata.values())
# we will use HuggingFace as our embeddings provider
embedding = HuggingFaceEmbeddings()

# name of the Redis search index to create
index_name = "products"

# assumes you have a redis stack server running on within your docker compose network
redis_url = "redis://127.0.0.1:6379"

# create and load redis with documents
vectorstore = RedisVectorStore.from_texts(
    texts=texts,
    metadatas=metadatas,
    embedding=embedding,
    index_name=index_name,
    redis_url=redis_url
)

  from .autonotebook import tqdm as notebook_tqdm


## Build the ChatBot with ConversationalRetrieverChain

In [9]:
import langchain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import (
    ConversationalRetrievalChain,
    LLMChain
)
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import LlamaCpp
from langchain.prompts.prompt import PromptTemplate

template = """Given the following chat history and a follow up question, rephrase the follow up input question to be a standalone question.
Or end the conversation if it seems like it's done.

Chat History:\"""
{chat_history}
\"""

Follow Up Input: \"""
{question}
\"""

Standalone question:"""

condense_question_prompt = PromptTemplate.from_template(template)

template = """You are a friendly, conversational retail shopping assistant. Use the following context including product names, descriptions, and keywords to show the shopper whats available, help find what they want, and answer any questions.
It's ok if you don't know the answer.

Context:\"""
{context}
\"""

Question:\"
\"""

Helpful Answer:"""

qa_prompt= PromptTemplate.from_template(template)

# llma configuration
#langchain.debug = True
n_gpu_layers = 1  # Metal set to 1 is enough.
n_ctx = 4096
n_batch = 4096 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.


#We will just use one streaming llm
llm = LlamaCpp(
   model_path="/Users/kyousaf//.cache/lm-studio/models/thebloke/llama-2-13b-chat.ggml/llama-2-13b-chat.ggmlv3.q6_K.bin",
   n_gpu_layers=n_gpu_layers,
   n_batch=n_batch,
   n_ctx = n_ctx,
   f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=CallbackManager([
        StreamingStdOutCallbackHandler()
    ]),
   verbose=True,
)


# use the LLM Chain to create a question creation chain
question_generator = LLMChain(
    llm=llm,
    prompt=condense_question_prompt
)

# use the streaming LLM to create a question answering chain
doc_chain = load_qa_chain(
    llm=llm,
    chain_type="stuff",
    prompt=qa_prompt
)


chatbot = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    combine_docs_chain=doc_chain,
    question_generator=question_generator
)

llama.cpp: loading model from /Users/kyousaf//.cache/lm-studio/models/thebloke/llama-2-13b-chat.ggml/llama-2-13b-chat.ggmlv3.q6_K.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 18 (mostly Q6_K)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 10848.82 MB (+ 3200.00 MB per st

In [None]:
# create a chat history buffer
chat_history = []
langchain.debug = False

# gather user input for the first question to kick off the bot
question = input("Hi! What are you looking for today?")

# keep the bot running in a loop to simulate a conversation
while True:
    result = chatbot(
        {"question": question, "chat_history": chat_history}
    )
    print("\n")
    chat_history.append((result["question"], result["answer"]))
    question = input()

Hi! What are you looking for today? shoes


## Customize your chains for even better performance

In [None]:
import json

from langchain.schema import BaseRetriever
from langchain.vectorstores import VectorStore
from langchain.schema import Document
from pydantic import BaseModel


class RedisProductRetriever(BaseRetriever, BaseModel):
    vectorstore: VectorStore

    class Config:
        
        arbitrary_types_allowed = True

    def combine_metadata(self, doc) -> str:
        metadata = doc.metadata
        return (
            "Item Name: " + metadata["item_name"] + ". " +
            "Item Description: " + metadata["bullet_point"] + ". " +
            "Item Keywords: " + metadata["item_keywords"] + "."
        )

    def get_relevant_documents(self, query):
        docs = []
        for doc in self.vectorstore.similarity_search(query):
            content = self.combine_metadata(doc)
            docs.append(Document(
                page_content=content,
                metadata=doc.metadata
            ))
        return docs

### Setup ChatBot with new retriever

In [None]:
redis_product_retriever = RedisProductRetriever(vectorstore=vectorstore)

chatbot = ConversationalRetrievalChain(
    retriever=redis_product_retriever,
    combine_docs_chain=doc_chain,
    question_generator=question_generator
)

### Retry

In [None]:
# create a chat history buffer
chat_history = []

# gather user input for the first question to kick off the bot
question = input("Hi! What are you looking for today?")

# keep the bot running in a loop to simulate a conversation
while True:
    result = chatbot(
        {"question": question, "chat_history": chat_history}
    )
    print("\n")
    chat_history.append((result["question"], result["answer"]))
    question = input()