### Initialize Libraries and Keys

In [None]:
# Install libraries into kernel (if not already installed)
# %pip install pinecone-client
# %pip install tqdm

In [None]:
# Base Python data handling environment imports 
import pandas as pd
import os
from tqdm.auto import tqdm
import time

# Pinecone is a cloud-based Vector Database we'll use 
# to store embeddings
import pinecone

# OpenAI is used for the embedding LLM and GenAI model 
# used to generate responses
import openai

# Langchain is middleware that ties together the components 
# of the embedding and retrieval pipelines 

# The embedding chain creates searchable vectors of our data
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone

# A link in the chain to operate a chat session
from langchain.chat_models import ChatOpenAI

# We'll maintain some memory of the chat so follow-up questions
# will be context-sensitive
from langchain.chains.conversation.memory \
import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

### Get Environment Variables

When using VSCode, install the dotenv extension and create an .env file with these contents:

OPENAI_KEY=YOUR_OPENAI_API_KEY

PINECONE_KEY=YOUR_PINECONE_KEY

In [None]:
OPENAI_KEY=os.getenv("OPENAI_KEY")
openai.api_key = OPENAI_KEY
EMBEDDING_MODEL="text-embedding-ada-002"
GENAI_MODEL='gpt-3.5-turbo'

PINECONE_KEY=os.getenv("PINECONE_KEY")
PINECONE_ENV="gcp-starter"
PINECONE_INDEX_NAME="default" # this will be created below

### Read Input Data

In [None]:
URL = "https://rhkdemo.blob.core.windows.net/demodata/squad-content.tsv"
df = pd.read_csv(URL, sep='\t')
df.head()


In [None]:
df.shape

In [None]:
# Fetch only context knowledge about Detroit and London
filtered_df = df.loc[df['subject'].isin(['Dell'])]
print(filtered_df['subject'].value_counts())
filtered_df.head()

### Create the Pinecone Vector Database if does not exist

In [None]:
pinecone.init(api_key = PINECONE_KEY, environment = PINECONE_ENV)
index_list = pinecone.list_indexes()
if len(index_list) == 0:
    print("Creating index...")
    pinecone.create_index(PINECONE_INDEX_NAME, dimension=1536, metric='dotproduct')
    
print(pinecone.describe_index(PINECONE_INDEX_NAME))
index = pinecone.Index(PINECONE_INDEX_NAME)

### Generate Embedding Index

In [None]:
# This references the text-embedding-ada-002 OpenAI model we'll use to create embeddings 
# Both for indexing ground knowledge content, and later when searching ground knowledge
# For RAG documents to include in LLM Prompts

embed = OpenAIEmbeddings(
    model = EMBEDDING_MODEL,
    openai_api_key= OPENAI_KEY)

In [None]:
# This is a for loop to create embeddings for each of the Detroit & London knowledge articles, and 
# Then add the embeddings and orgiional article text to the vector databse
# Shout-out to Dr. KM Moshin for this code snippet from his Excellent Udemy course on Pinecone!
batch_size = 20 

for i in tqdm(range(0, len(filtered_df), batch_size)):
    # OpenAPI has rate limits, and we use batches to slow the pace of embedding requests
    i_end = min(i+batch_size, len(filtered_df))
    batch = filtered_df.iloc[i:i_end]
    
    # When querying the Vector DB for nearest vectors, the metadata 
    # is what is returned and added to the LLM Prompt (the "Grounding Knowledge")
    meta_data = [{"subject" : row['subject'], 
              "context": row['context']} 
             for i, row in batch.iterrows()]
    
    # Get a list of documents to submit to OpenAI for embedding  
    docs = batch['context'].tolist() 
    emb_vectors = embed.embed_documents(docs) 

    # The original ID keys are used as the PK in the Vector DB
    ids = batch['id'].tolist()
    
    # Add embeddings, associated metadata, and the keys to the vector DB
    to_upsert = zip(ids, emb_vectors, meta_data)    
    index.upsert(vectors=to_upsert)
    
    # Pause for 10 seconds after each batch to avoid rate limits
    time.sleep(10) 

### Submit a simple query to the Vector Index to ensure we it works!

In [None]:
vectorstore = Pinecone(index, embed, "context")
query = "Who founded Dell?" #ask some question that's answerable with the content added to the Vector DB
vectorstore.similarity_search(query, k=3)


### Create a GPT 3.5 Turbo Chatbot with a 5 response memory

In [None]:
# Create a reference to the OpenAI LLM
llm = ChatOpenAI(openai_api_key = OPENAI_KEY,
                model_name = GENAI_MODEL,
                temperature = 0.0)

# Ensure the chat session includes memory of 5 previous messages
conv_mem = ConversationBufferWindowMemory(
    memory_key = 'history',
    k = 5,
    return_messages =True)

# Create the chain to manage the chat session
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = vectorstore.as_retriever())

### Now have a conversation about the documents that were added to the grounding data vector database

In [None]:
qa.run("What do people like about london?")

In [None]:
qa.run("is it expensive to live there?")

In [None]:
qa.run("Does dell make surfboards?")

In [None]:
qa.run("Do they make laptops?")

In [None]:
qa.run("Who founded Dell computer?")