In [None]:
# to load the environment variables
from dotenv import load_dotenv
import os

# to tokenize, chunk, and embed the text
from openai import OpenAI
import nltk
import tiktoken
from typing import List

# to distill the results
from langchain_openai import ChatOpenAI
from langchain.docstore.document import Document
from langchain.chains.question_answering import load_qa_chain

nltk.download('punkt')

In [None]:
# Load environment variables.
variables_to_clear = ['OPENAI_API_KEY',
                      'LANGCHAIN_TRACING_V2',
                        'LANGCHAIN_ENDPOINT',
                        'LANGCHAIN_API_KEY',
                        'LANGCHAIN_PROJECT']
for var in variables_to_clear:
    if var in os.environ:
        del os.environ[var]

load_dotenv()

In [None]:
# A large block of text to vectorize as embeddings
text = """Hello, how are you today? This is a story of a lovely lady who was bringing up three very
lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. The love boat is a television show about an early eighties cruise ship that told the story of captain stubing and his crew. Each episode was a story of one voyage. They were four men, living all together, yet they were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. There are several symptoms of antibiotic overuse. These include the development of antibiotic-resistant bacteria, which can cause serious infections that are difficult to treat. Antibiotic overuse can also lead to side effects such as diarrhea, nausea, and vomiting. In some cases, it can even cause allergic reactions. To prevent antibiotic overuse, it's important to only take antibiotics when they are necessary and to always follow your doctor's instructions. If you have any questions about antibiotic use, be sure to talk to your healthcare provider They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. The necromancer raised eighteen skeletons from the grave on a cold winters day in January. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch.Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. 
The a-team were a band of swashbuckling mercenaries who were always on the run from the law. Their adventures were legendary and always involved a lot of explosions. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, how are you today? This is a story of a lovely lady who was bringing up three very lovely girls, all of them had hair of gold, like their mother, the youngest one in curls. Here's a story, of a 
man named Brady, who was busy with three young boys of his own. They were four men, living all together, yet they 
were all alone. Till the one day when the lady met this fellow, and they knew that it was much more than a hunch,
that this group must somehow form a family, that's the way they all became the Brady Bunch. The Brady Bunch, the
Brady Bunch, that's the way they became the Brady Bunch. Hello, my name is Inigo Montoya. You killed my father. Prepare to die.
"""

In [None]:
# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-4-turbo-preview"
EMBED_MODEL = "text-embedding-3-small"
# Store the API key in a variable.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=OPENAI_API_KEY)
MAX_TOKENS = 512

def prep(text: str):
    return text.replace("\n", " ").replace("\r", " ").replace("\t", " ")

def tokenize(text: List[str]):
    encoding = tiktoken.encoding_for_model(EMBED_MODEL)
    return encoding.encode(text)

def embed(tokens: List[int]):
    response = client.embeddings.create(input=tokens,model=EMBED_MODEL)
    return response.data[0].embedding

def chunk_text(text:str):
    current_chunk = []
    current_para = ""
    chunks = []
    paras = []
    current_len = 0
    sentences = nltk.sent_tokenize(text)
    chunks_of_tokens = []
    
    for sentence in sentences:
        # Tokenize the sentence
        sentence_tokens = tokenize(sentence)
        sentence_token_len = len(sentence_tokens)
        
        # Check if adding the next sentence exceeds the max token limit
        if current_len + sentence_token_len > MAX_TOKENS:
            # Add the current chunk to the list and start a new one
            paras.append(current_para)
            current_para = ""
            chunks_of_tokens.append(current_chunk)
            embeddings = embed(current_chunk)
            chunks.append(embeddings)
            current_chunk = []
            current_len = 0
        
        # Add the sentence to the current chunk
        current_para += " " + sentence
        current_chunk.extend(sentence_tokens)
        current_len += sentence_token_len
    
    # Add the last chunk if it's not empty
    if current_chunk:
        paras.append(current_para)
        chunks_of_tokens.append(current_chunk)
        embeddings = embed(current_chunk)
        chunks.append(embeddings)

    return paras, chunks, chunks_of_tokens

def create_embeddings(filename: str):
    with open(filename, "r") as file:
        text = file.read()
    text = prep(text)
    return chunk_text(text)
    
def create_embeddings_prompt(prompt:str):
    prompt = prep(prompt)
    return chunk_text(prompt)

def vectorize_chunks(paras: List, chunks: List, **kwargs):
    vectors = []
    for i in range(len(chunks)):
        if "filename" in kwargs:
            vectors.append({"id": f"{i}", "values": chunks[i], "metadata": {"file": filename, "para": f"{paras[i]}"}})
        else:
            vectors.append({"id": f"{i}", "values": chunks[i], "metadata": {"para": f"{paras[i]}"}})
        
    return vectors


In [None]:
paras, chunks, chunks_of_tokens  = create_embeddings_prompt(text)
vectors = vectorize_chunks(paras, chunks)

In [None]:
print(len(paras), len(paras[0]), len(chunks), len(chunks[0]), len(chunks_of_tokens), len(chunks_of_tokens[0]), len(vectors))
print(paras[0])
print(chunks[0])
print(chunks_of_tokens[0])
print(vectors[0])

In [None]:
from pinecone import Pinecone

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index = pc.Index("ai-class1")

In [None]:
filename = "resources/README.md"
paras, chunks, chunks_of_tokens = create_embeddings(filename)
vectors_from_file = vectorize_chunks(paras, chunks, filename=filename)
vectors.extend(vectors_from_file)
filename = "resources/README copy.md"
paras, chunks, chunks_of_tokens = create_embeddings(filename)
vectors_from_file = vectorize_chunks(paras, chunks, filename=filename)
vectors.extend(vectors_from_file)

index.upsert(
    vectors=vectors     
)

In [None]:
print(len(vectors[0]["values"]))

In [None]:
query_responses=[]

def ask_a_question(prompt):
    # convert the prompt to chunks of  embeddings
    paras, chunks, chunks_of_tokens  = create_embeddings_prompt(prompt)
    print(f"Embeddings: {chunks[0]}")
    # vectorize the embeddings
    prompt_vectors = vectorize_chunks(paras, chunks)
    print(f"Vectorized: {prompt_vectors[0]}")
    # search the index for the best match using semantic search
    query_response = index.query(
        top_k=2,
        vector=prompt_vectors[0]["values"]
    )
    query_responses.append(query_response)
    print(f"Query response: {query_response}")
    # get the id of the best match
    best_id = query_response["matches"][0]["id"]
    print(f"Best ID: {best_id}")
    # fetch the best match from the index
    result = index.fetch(ids=[best_id])
    # get the paragraph of interest from the result metadata
    para_of_interest = result["vectors"][best_id]["metadata"]["para"]
    print(f"Para of interest: {para_of_interest}")
    # Initialize the langchain chat model.
    llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.0)
    # turn the para_of_interest into a Document
    document = Document(page_content=para_of_interest)
    # Create the QA chain using the LLM.
    chain = load_qa_chain(llm)
    # Pass the para_of_interest and the prompt to the chain, and print the result.
    question = "If you can't find the answer in the provided document, say, I just don't know the answer to that, otherwise, answer the question. " + prompt
    result = chain.invoke({"input_documents": [document], "question": question})
    return result["output_text"]

In [None]:
query_responses=[]

questions = ["what does inigo montoya say?",
            "what is the love boat television show about?",
            "who is the captain of the love boat?",
            "what are the symptoms of antibiotic overuse?",
            "what is covid-19?",
            "what is the a-team?",
            "who are the members of the a-team?",
            "How many skeletons did the necromancer raise?",
            "What will langsmith help us do?"]

answers = []
for question in questions:
    answers.append(ask_a_question(question))

In [None]:
ix = 0
for query_response in query_responses:
    print(f"Match Score: {query_response['matches'][0]['score']}")
    print(f"Question: {questions[ix]}")
    print(f"Answer:   {answers[ix]}\n\n")
    ix += 1