## 0. Importing Libraries

In [1]:
import pandas as pd
import openai
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.agents import Tool
from langchain.agents import initialize_agent
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from tqdm.auto import tqdm
from uuid import uuid4
import pandas as pd
import os
import torch
from torchtext.data.utils import get_tokenizer
import dill
import re
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModelForQuestionAnswering
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
import torch
from langchain.chains import LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

### Embeddings and vector store

In [3]:
import torch
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

model_name = 'hkunlp/instructor-base'

embedding_model = HuggingFaceInstructEmbeddings(
    model_name = model_name,
    model_kwargs = {"device" : device}
)

load INSTRUCTOR_Transformer
max_seq_length  512


In [4]:
#locate vectorstore
vector_path = './vector_stores'
if not os.path.exists(vector_path):
    os.makedirs(vector_path)
    print('create path done')

In [5]:
def predict(text_str):
    text_str = text_str.lower()
    device = 'cpu'
    regex_s = re.sub("\\(.+?\\)|[\r\n|\n\r]|!", "", text_str)
    text = " ".join(regex_s.split())
    tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
    loaded_model = torch.jit.load('../question_classification/model/5class/CNN.pt')
    with open('../question_classification/model/5class/vocab.pkl', 'rb') as f:
        loaded_vocab = dill.load(f)
    text = torch.tensor(loaded_vocab(tokenizer(text))).to(device)
    text = text.reshape(1, -1)
    with torch.no_grad():
        output = loaded_model(text).squeeze(1)
        predicted = torch.max(output.data, 1)[1]
        return predicted.item()

In [6]:
text = "What rice cookers are available?"
predict(text)

1

In [7]:
categories = [
    'Electronics', 'Home_and_Kitchen', 'Sports_and_Outdoors', 
    'Tools_and_Home_Improvement', 'Beauty_and_Personal_Care'
]

In [8]:
categories[1]

'Home_and_Kitchen'

In [9]:
categories[predict(text)]

'Home_and_Kitchen'

In [10]:
def choose_vector_store(text, size):

    category = categories[predict(text)]
    #calling vector from local
    vector_path = './vector_stores'

    from langchain.vectorstores import FAISS

    db_file_name = f"{size}/{category}"

    vectordb = FAISS.load_local(
        folder_path = os.path.join(vector_path, db_file_name),
        embeddings = embedding_model,
        index_name = f'{category}' #default index
    )
    retriever = vectordb.as_retriever()

    return retriever

### Test model

In [11]:
#locate models
model_path = './models'
if not os.path.exists(model_path):
    os.makedirs(model_path)
    print('create path done')

In [12]:
# %cd ./models
# !git clone https://huggingface.co/anas-awadalla/gpt2-span-head-few-shot-k-16-finetuned-squad-seed-0
# !git clone https://huggingface.co/lmsys/fastchat-t5-3b-v1.0
# !git clone git clone https://huggingface.co/google/flan-t5-base
# !git clone git clone https://huggingface.co/google/flan-t5-large


In [13]:
def gpt2_model(temp = 0, rep = 1.5):
    model_id = 'models/gpt2-span-head-few-shot-k-16-finetuned-squad-seed-0/'

    tokenizer = AutoTokenizer.from_pretrained(
        model_id)

    tokenizer.pad_token_id = tokenizer.eos_token_id

    bitsandbyte_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bitsandbyte_config,
        device_map='cuda:0',
        load_in_8bit=True
    )

    pipe = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        max_new_tokens=100,
        model_kwargs={
            "temperature": temp,
            "repetition_penalty": rep
        }
    )

    llm = HuggingFacePipeline(pipeline=pipe)

    return llm

In [14]:
def t5_model(temp = 0, rep = 1.5):
    model_id = './models/fastchat-t5-3b-v1.0/'

    tokenizer = AutoTokenizer.from_pretrained(
        model_id)

    tokenizer.pad_token_id = tokenizer.eos_token_id

    bitsandbyte_config = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_quant_type = "nf4",
        bnb_4bit_compute_dtype = torch.float16,
        bnb_4bit_use_double_quant = True
    )

    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_id,
        quantization_config = bitsandbyte_config, #caution Nvidia
        device_map = 'auto',
        load_in_8bit = True
    )

    pipe = pipeline(
        task="text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens = 100,
        model_kwargs = {
            "temperature" : temp,
            "repetition_penalty": rep
        }
    )

    llm = HuggingFacePipeline(pipeline = pipe)

    return llm

In [15]:
def flan_t5_model(temp = 0, rep = 1.5):
    model_id = './models/flan-t5-base/'

    tokenizer = AutoTokenizer.from_pretrained(
        model_id)

    tokenizer.pad_token_id = tokenizer.eos_token_id

    bitsandbyte_config = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_quant_type = "nf4",
        bnb_4bit_compute_dtype = torch.float16,
        bnb_4bit_use_double_quant = True
    )

    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_id,
        quantization_config = bitsandbyte_config, #caution Nvidia
        device_map = 'auto',
        load_in_8bit = True
    )

    pipe = pipeline(
        task="text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens = 100,
        model_kwargs = {
            "temperature" : temp,
            "repetition_penalty": rep
        }
    )

    llm = HuggingFacePipeline(pipeline = pipe)

    return llm

In [16]:
def flan_t5_large_model(temp = 0, rep = 1.5):
    model_id = './models/flan-t5-large/'

    tokenizer = AutoTokenizer.from_pretrained(
        model_id)

    tokenizer.pad_token_id = tokenizer.eos_token_id

    bitsandbyte_config = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_quant_type = "nf4",
        bnb_4bit_compute_dtype = torch.float16,
        bnb_4bit_use_double_quant = True
    )

    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_id,
        quantization_config = bitsandbyte_config, #caution Nvidia
        device_map = 'auto',
        load_in_8bit = True
    )

    pipe = pipeline(
        task="text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens = 100,
        model_kwargs = {
            "temperature" : temp,
            "repetition_penalty": rep
        }
    )

    llm = HuggingFacePipeline(pipeline = pipe)

    return llm

In [17]:
CONDENSE_QUESTION_PROMPT

PromptTemplate(input_variables=['chat_history', 'question'], template='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:')

In [18]:
def create_chain(llm, retriever):
    question_generator = LLMChain(
        llm = llm,
        prompt = CONDENSE_QUESTION_PROMPT,
        verbose = True
    )

    prompt_template = """
        Test prompt for NLP Amazon sales chatbot.
        {context}
        Question: {question}
        Answer:
        """.strip()

    PROMPT = PromptTemplate.from_template(
        template = prompt_template
    )

    PROMPT
    #using str.format 
    #The placeholder is defined using curly brackets: {} {}
    doc_chain = load_qa_chain(
        llm = llm,
        chain_type = 'stuff',
        prompt = PROMPT,
        verbose = True
    )

    memory = ConversationBufferWindowMemory(
        k=1, 
        memory_key = "chat_history",
        return_messages = True,
        output_key = 'answer'
    )

    chain = ConversationalRetrievalChain(
        retriever=retriever,
        question_generator=question_generator,
        combine_docs_chain=doc_chain,
        return_source_documents=True,
        memory=memory,
        verbose=True,
        get_chat_history=lambda h : h
    )
    
    return chain

In [19]:
def chat_answer(prompt_question, llm):
    torch.cuda.empty_cache()
    retriever = choose_vector_store(prompt_question, 100)
    chain = create_chain(llm, retriever)
    answer = chain({"question":prompt_question})

    return answer


In [None]:
prompt_question = "Can you tell me what crossbows are available?"
llm = gpt2_model()
answer = chat_answer(prompt_question, llm)
answer

In [None]:
prompt_question = "What is the best eye liner?"
llm = t5_model()
answer = chat_answer(prompt_question, llm)
answer

In [20]:
prompt_question = "What is the best eye liner?"
llm = flan_t5_model()
answer = chat_answer(prompt_question, llm)
answer

  warn_deprecated(




[1m> Entering new ConversationalRetrievalChain chain...[0m


Token indices sequence length is longer than the specified maximum sequence length for this model (4144 > 512). Running this sequence through the model will result in indexing errors




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mTest prompt for NLP Amazon sales chatbot.
        {"title": "Gateway 15.6\" FHD Ultra Slim Budget Notebook, Intel Pentium Processor(4-Core, up to 3.1 GHz), 4GB RAM, 128GB Storage, Precision Touchpad, HDMI, Type-C Windows 10S, Microsoft 365 1 Year, Charcoal Black", "average_rating": 4.1, "rating_number": 15, "features": ["\u3010Processor\u30114 Core, 4 Threads, 4MB Cache, up to 3.1 GHz. The perfect combination of performance, power consumption, and value helps your device handle multitasking smoothly and reliably with four processing cores to divide up the work.", "\u3010Display\u301115.6'' FHD (1920 x 1080) IPS Display, with up to 178 degree viewing angles, Non-touch Narrow Border Display, enjoy your photos, movies, and games on a crystal-clear FHD screen.", "\u3010RAM and Storage\u3011RAM is 4 GB high-bandwidth RAM to smoothly run multiple applications

{'question': 'What is the best eye liner?',
 'chat_history': [],
 'answer': 'a smudge',
 'source_documents': [Document(page_content='{"title": "Gateway 15.6\\" FHD Ultra Slim Budget Notebook, Intel Pentium Processor(4-Core, up to 3.1 GHz), 4GB RAM, 128GB Storage, Precision Touchpad, HDMI, Type-C Windows 10S, Microsoft 365 1 Year, Charcoal Black", "average_rating": 4.1, "rating_number": 15, "features": ["\\u3010Processor\\u30114 Core, 4 Threads, 4MB Cache, up to 3.1 GHz. The perfect combination of performance, power consumption, and value helps your device handle multitasking smoothly and reliably with four processing cores to divide up the work.", "\\u3010Display\\u301115.6\'\' FHD (1920 x 1080) IPS Display, with up to 178 degree viewing angles, Non-touch Narrow Border Display, enjoy your photos, movies, and games on a crystal-clear FHD screen.", "\\u3010RAM and Storage\\u3011RAM is 4 GB high-bandwidth RAM to smoothly run multiple applications and browser tabs all at once; storage is i

In [21]:
prompt_question = "What is the best eye liner?"
llm = flan_t5_large_model()
answer = chat_answer(prompt_question, llm)
answer

Token indices sequence length is longer than the specified maximum sequence length for this model (4144 > 512). Running this sequence through the model will result in indexing errors




[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mTest prompt for NLP Amazon sales chatbot.
        {"title": "Gateway 15.6\" FHD Ultra Slim Budget Notebook, Intel Pentium Processor(4-Core, up to 3.1 GHz), 4GB RAM, 128GB Storage, Precision Touchpad, HDMI, Type-C Windows 10S, Microsoft 365 1 Year, Charcoal Black", "average_rating": 4.1, "rating_number": 15, "features": ["\u3010Processor\u30114 Core, 4 Threads, 4MB Cache, up to 3.1 GHz. The perfect combination of performance, power consumption, and value helps your device handle multitasking smoothly and reliably with four processing cores to divide up the work.", "\u3010Display\u301115.6'' FHD (1920 x 1080) IPS Display, with up to 178 degree viewing angles, Non-touch Narrow Border Display, enjoy your photos, movies, and games on a crystal-clear FHD screen.", "\u3010RAM and Storage\u3011RAM i

{'question': 'What is the best eye liner?',
 'chat_history': [],
 'answer': '["Wenlaty Case Compatible with iPad 9th/8th/7th Generation Case(2021/2020/2019), Full Body Protective with Pencil Holder, Designed for iPad 10.2 Inch, Auto Sleep/Wake Cover, Sky Blue"]',
 'source_documents': [Document(page_content='{"title": "Gateway 15.6\\" FHD Ultra Slim Budget Notebook, Intel Pentium Processor(4-Core, up to 3.1 GHz), 4GB RAM, 128GB Storage, Precision Touchpad, HDMI, Type-C Windows 10S, Microsoft 365 1 Year, Charcoal Black", "average_rating": 4.1, "rating_number": 15, "features": ["\\u3010Processor\\u30114 Core, 4 Threads, 4MB Cache, up to 3.1 GHz. The perfect combination of performance, power consumption, and value helps your device handle multitasking smoothly and reliably with four processing cores to divide up the work.", "\\u3010Display\\u301115.6\'\' FHD (1920 x 1080) IPS Display, with up to 178 degree viewing angles, Non-touch Narrow Border Display, enjoy your photos, movies, and game