In [5]:
#!pip install -r './requirements.txt'

In [6]:
#!pip install python_dotenv

In [7]:
# To download the data files, sign up for an account on Kaggle if you haven't already, then download the dataset here
# https://www.kaggle.com/datasets/ashishsinhaiitr/lord-of-the-rings-text?resource=download

In [8]:
import os
from dotenv import load_dotenv

load_dotenv('.env')

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
PINECONE_API_KEY = os.environ['PINECONE_API_KEY']
PINECONE_API_ENV = os.environ['PINECONE_API_ENV']
PINECONE_INDEX_NAME = os.environ['PINECONE_INDEX_NAME']

In [9]:
PINECONE_INDEX_NAME

'lotr-fellowship'

### Prepare Embeddings with 500 tokens and 100 overlap (Sparse and Dense + SPLADE)
#### SPLADE

In [10]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch

class SPLADE:
    def __init__(self, model):
        # check device
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.tokenizer = AutoTokenizer.from_pretrained(model)
        self.model = AutoModelForMaskedLM.from_pretrained(model)
        # move to gpu if available
        self.model.to(self.device)

    def __call__(self, text: str):
        inputs = self.tokenizer(text, return_tensors="pt").to(self.device)

        with torch.no_grad():
            logits = self.model(**inputs).logits

        inter = torch.log1p(torch.relu(logits[0]))
        token_max = torch.max(inter, dim=0)  # sum over input tokens
        nz_tokens = torch.where(token_max.values > 0)[0]
        nz_weights = token_max.values[nz_tokens]

        order = torch.sort(nz_weights, descending=True)
        nz_weights = nz_weights[order[1]]
        nz_tokens = nz_tokens[order[1]]
        return {
            'indices': nz_tokens.cpu().numpy().tolist(),
            'values': nz_weights.cpu().numpy().tolist()
        }

#### Instantiate splade

In [11]:
splade = SPLADE("naver/splade-cocondenser-ensembledistil")

#### Splade (sparse vector example)

In [12]:
doc = "what is the capital of france?"
sparse_vector = splade(doc)
print(sparse_vector)

{'indices': [3007, 2605, 2413, 2885, 9424, 2103, 10505, 3000, 8709, 5288, 2762, 2237, 2563, 2406, 3317, 3295, 5706, 2313, 4079, 3235, 5578, 1997, 3226, 4075, 3835, 12686, 2688, 3434, 3304, 4574, 7213, 3655, 4511, 4199, 2798, 8891, 1996, 1040], 'values': [3.099395275115967, 2.8517937660217285, 2.3875114917755127, 1.7378655672073364, 1.7361198663711548, 1.3342801332473755, 0.8021481037139893, 0.7074628472328186, 0.6788463592529297, 0.5341130495071411, 0.45094141364097595, 0.4456666111946106, 0.3082226514816284, 0.2772521674633026, 0.27454623579978943, 0.25291863083839417, 0.2430085390806198, 0.23374886810779572, 0.19339250028133392, 0.1928836852312088, 0.19277732074260712, 0.18605473637580872, 0.16889144480228424, 0.16056573390960693, 0.14718939363956451, 0.1397988349199295, 0.13846363127231598, 0.10066269338130951, 0.09511645883321762, 0.08622918277978897, 0.06183822080492973, 0.05464307591319084, 0.05087859556078911, 0.04185910150408745, 0.03533969074487686, 0.018414614722132683, 0.015

#### Set up OpenAI Embedding process

In [13]:
import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt, retry_if_not_exception_type
from typing import List
from uuid import uuid4
import textwrap
openai.api_key = OPENAI_API_KEY
EMBEDDING_MODEL = 'text-embedding-ada-002'
EMBEDDING_CTX_LENGTH = 8191
EMBEDDING_ENCODING = 'cl100k_base'

# let's make sure to not retry on an invalid request, because that is what we want to demonstrate
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6), retry=retry_if_not_exception_type(openai.InvalidRequestError))
def get_embedding(text_or_tokens, model=EMBEDDING_MODEL):
    return openai.Embedding.create(input=text_or_tokens, model=model)

def chunk_text(text: str, max_chunk_size: int, overlap_size: int) -> List[str]:
    """Helper function to chunk a text into overlapping chunks of specified size."""
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + max_chunk_size, len(text))
        chunks.append(text[start:end])
        start += max_chunk_size - overlap_size
    return chunks

def transform_record(record: dict) -> List[dict]:
    """Transform a single record as described in the prompt."""
    max_chunk_size = 500
    overlap_size = 100
    chunks = chunk_text(record, max_chunk_size, overlap_size)
    transformed_records = []
    recordId = str(uuid4())
    for i, chunk in enumerate(chunks):
        chunk_id = f"{recordId}-{i+1}"
        transformed_records.append({
            'chunk_id': chunk_id,
            'chunk_parent_id': recordId,
            'chunk_text': chunk,
            'vector' : get_embedding(chunk).get('data')[0]['embedding'],
            'sparse_values': splade(chunk)
        })
    return transformed_records

#### Generate Pinecone Index

In [14]:
import pinecone

index_name = PINECONE_INDEX_NAME

# initialize connection to pinecone (get API key at app.pinecone.io)
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_API_ENV  # may be different, check at app.pinecone.io
)

# check if index already exists (it shouldn't if this is first time)
if index_name not in pinecone.list_indexes():
    # if does not exist, create index
    pinecone.create_index(
        index_name,
        dimension=1536,
        metric='dotproduct',
        metadata_config={'indexed': ['unused']},
        pod_type='p1.x1'
    )
# connect to index
index = pinecone.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 2553}},
 'total_vector_count': 2553}

#### Prepare and load data (LOTR Fellowship of the Ring)

In [None]:
with open('./LOTR/01 - The Fellowship Of The Ring.txt', 'r', encoding='ISO-8859-1') as f:
    file = f.read()

#### Generate embeddings and Pickle the results to save money on OpenAI

In [None]:
chunked_data = []
chunk_array = transform_record(file)
for chunk in chunk_array:
    chunked_data.append(chunk)

### Save data and vectors offline

In [None]:
import pickle
# Pickle the array
with open('LOTR_vector_data.pickle_500_100_sparse_dense', 'wb') as f:
    pickle.dump(chunked_data, f)

### Load data from local to upsert to Pinecone

In [None]:
import pickle
with open('./LOTR_vector_data.pickle_500_100_sparse_dense', 'rb') as f:
    vector_data = pickle.load(f)

#### Format data to load to Pinecone

In [None]:
def prepare_entries_for_pinecone(entries):
    """
    Prepares an array of entries for upsert to Pinecone.
    Each entry should have a 'vector' field containing a list of floats.
    """
    vectors = []
    for entry in entries:
        vector = entry['vector']
        id = entry.get('chunk_id', '')
        metadata = entry.get('metadata', {'chunk_id': entry.get('chunk_id', ''),'parent_id': entry.get('chunk_parent_id', ''), 'chunk_text': entry.get('chunk_text', '')})
        values = [v for v in vector]
        sparse_values = entry['sparse_values']
        vectors.append({'id': id, 'metadata': metadata, 'values': values, 'sparse_values': sparse_values})
    return {'vectors': vectors, 'namespace': ''}


In [None]:
vectors = prepare_entries_for_pinecone(vector_data)

#### Upsert vectors (sparse and dense) and metadata to Pinecone

In [None]:
from tqdm.auto import tqdm  # this is our progress bar

batch_size = 32  # process everything in batches of 32
for i in tqdm(range(0, len(vectors['vectors']), batch_size)):
    ids_batch = [id['id'] for id in vectors['vectors'][i:i+batch_size]]
    embeds = [id['values'] for id in vectors['vectors'][i:i+batch_size]]
    meta = [id['metadata'] for id in vectors['vectors'][i:i+batch_size]]
    sparse_values = [id['sparse_values'] for id in vectors['vectors'][i:i+batch_size]]
    upserts = []
    # loop through the data and create dictionaries for uploading documents to pinecone index
    for _id, sparse, dense, meta in zip(ids_batch, sparse_values, embeds, meta):
        upserts.append({
            'id': _id,
            'sparse_values': sparse,
            'values': dense,
            'metadata': meta
        })
    # upload the documents to the new hybrid index
    index.upsert(upserts)


#### Query Pinecone and OpenAI

In [15]:
limit = 8000

def retrieve(query):
    res = openai.Embedding.create(
        input=[query],
        engine=EMBEDDING_MODEL
    )

    # retrieve from Pinecone
    xq = res['data'][0]['embedding']
    sq = splade(query)


    # get relevant contexts
    res = index.query(xq, top_k=5, include_metadata=True, sparse_vector=sq)
    contexts = [
        x['metadata']['chunk_text'] for x in res['matches']
    ]

    # build our prompt with the retrieved contexts included
    prompt_start = (
        "Answer the question based on the context below. If you cannot answer based on the context or general knowledge about J.R.R. Tolkien's Lord of the Rings The Fellowship of the Ring, truthfully answer that you don't know.\n\n"+
        "Context:\n"
    )
    prompt_end = (
        f"\n\nQuestion: {query}\nAnswer:"
    )
    # append contexts until hitting limit
    for i in range(1, len(contexts)):
        if len("\n\n---\n\n".join(contexts[:i])) >= limit:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts[:i-1]) +
                prompt_end
            )
            break
        elif i == len(contexts)-1:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts) +
                prompt_end
            )
    return prompt

def complete(prompt):
    # query text-davinci-003
    res = openai.Completion.create(
        engine='text-davinci-003',
        prompt=prompt,
        temperature=0,
        max_tokens=512,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )
    return res['choices'][0]['text'].strip()

#### Langchain Memory for conversation chat style

In [16]:
from langchain import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryBufferMemory

llm = OpenAI(
	temperature=0,
	openai_api_key=OPENAI_API_KEY,
	model_name="text-davinci-003"
)
conversation_with_summary = ConversationChain(
    llm=llm, 
    # We set a very low max_token_limit for the purposes of testing.
    memory=ConversationSummaryBufferMemory(llm=llm, max_token_limit=650)
)
#conversation_with_summary.predict(input="Hi, what's up?")

#### Sample query to Pinecone and OpenAI

In [17]:
query ="Who is Bilbo?"
# first we retrieve relevant items from Pinecone
query_with_contexts = retrieve(query)
print(textwrap.fill(str(conversation_with_summary.predict(input=query_with_contexts))))

 Bilbo is a hobbit from J.R.R. Tolkien's Lord of the Rings The
Fellowship of the Ring. He is a wealthy and peculiar character who is
known for his remarkable disappearance and unexpected return. He is
also known for his book-learning and writing poetry. He is a friend of
the Gaffer and Frodo, and is often consulted on the growing of
vegetables.


#### Clear conversation memory if desired

In [None]:
#conversation_with_summary.memory.clear()

#### Loop to ask multiple questions and get answers

In [None]:
while True:
    # Prompt user for input
    user_input = input("Enter your input (type 'quit' to exit): ")

    # Check if user wants to quit
    if user_input.lower() == "quit":
        print("Exiting program...")
        break

    # Process user input
    processed_input = user_input.upper()  # Convert to all uppercase letters
    print("Processed input: ", processed_input)

    query = user_input

    # first we retrieve relevant items from Pinecone
    query_with_contexts = retrieve(query)

    # then we send the context and the query to OpenAI
    print(textwrap.fill(str(conversation_with_summary.predict(input=query_with_contexts))) + '\n')

