# Retrieval Augmented Generation (RAG) and Vector Databases

In [3]:
#%pip install getenv openai==1.12.0
%pip install datetime

Defaulting to user installation because normal site-packages is not writeable
Collecting datetime
  Downloading DateTime-5.5-py3-none-any.whl.metadata (33 kB)
Collecting zope.interface (from datetime)
  Downloading zope.interface-6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading zope.interface-6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.3/247.3 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: zope.interface, datetime
Successfully installed datetime-5.5

In [1]:
import os
import pandas as pd
import numpy as np
from ollama import Client
from dotenv import load_dotenv
load_dotenv()

client = Client(
    host = os.environ['OLLAMA_ENDPOINT']
    )
model = "mistral" # Change the model accordingly

## Creating our Knowledge base

Creating a Azure Cosmos DB database


In [3]:
pip install azure-cosmos

Note: you may need to restart the kernel to use updated packages.


In [4]:
## create your cosmoss db on Azure CLI using the following commands
## az login
## az group create -n <resource-group-name> -l <location>
## az cosmosdb create -n <cosmos-db-name> -r <resource-group-name>
## az cosmosdb list-keys -n <cosmos-db-name> -g <resource-group-name>

## Once done navigate to data explorer and create a new database and a new container


In [2]:
#from azure.cosmos import CosmosClient

# Initialize Cosmos Client
#url = os.getenv('COSMOS_DB_ENDPOINT')
#key = os.getenv('COSMOS_DB_KEY')
#client = CosmosClient(url, credential=key)

# Select database
#database_name = 'rag-cosmos-db'
#database = client.get_database_client(database_name)

# Select container
#container_name = 'data'
#container = database.get_container_client(container_name)

import psycopg2

database = os.environ["POSTGRES_SERVICE_URL"]

# Connect to the PostgreSQL database
conn = psycopg2.connect(
    host='localhost',
    database='vectordb',
    user='postgres',
    password='password'
)

# Create a cursor object
cur = conn.cursor()

# Create a table with pgvector type
cur.execute('CREATE TABLE my_table (id SERIAL PRIMARY KEY,name TEXT,feature_vector VECTOR(128));')

# Commit changes and close the cursor
conn.commit()
cur.close()

DuplicateTable: relation "my_table" already exists


In [3]:
import pandas as pd

# Initialize an empty DataFrame
df = pd.DataFrame(columns=['path', 'text'])


# splitting our data into chunks
data_paths= ["data/frameworks.md", "data/own_framework.md", "data/perceptron.md"]

for path in data_paths:
    with open(path, 'r', encoding='utf-8') as file:
        file_content = file.read()

    # Append the file path and text to the DataFrame
    df = df._append({'path': path, 'text': file_content}, ignore_index=True)

df.head()

Unnamed: 0,path,text
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...


In [4]:
def split_text(text, max_length, min_length):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(' '.join(current_chunk)) < max_length and len(' '.join(current_chunk)) > min_length:
            chunks.append(' '.join(current_chunk))
            current_chunk = []

    # If the last chunk didn't reach the minimum length, add it anyway
    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

# Assuming analyzed_df is a pandas DataFrame and 'output_content' is a column in that DataFrame
splitted_df = df.copy()
splitted_df['chunks'] = splitted_df['text'].apply(lambda x: split_text(x, 400, 300))

splitted_df

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,[# Neural Network Frameworks As we have learne...
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...,[# Introduction to Neural Networks. Multi-Laye...
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...,[# Introduction to Neural Networks: Perceptron...


In [5]:
# Assuming 'chunks' is a column of lists in the DataFrame splitted_df, we will split the chunks into different rows
flattened_df = splitted_df.explode('chunks')

flattened_df.head()

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke..."


## Converting our text to embeddings

Converting out text  to embeddings, and storing them in our database in chunks

In [12]:
openai.api_type = "azure"
openai.api_key = os.getenv("AZURE_OPENAI_KEY") 
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") 
openai.api_version = "2023-07-01-preview"



In [13]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"))

In [6]:
def create_embeddings(text, model=model):
    # Create embeddings for each document chunk
    embeddings = client.embeddings(model=model, prompt = text)
    return embeddings['embedding']

#embeddings for the first chunk
create_embeddings(flattened_df['chunks'].iloc[0])

[-0.8898268938064575,
 8.514480590820312,
 -3.0923688411712646,
 -2.913651943206787,
 10.449320793151855,
 -5.910948753356934,
 -5.838630199432373,
 4.984593868255615,
 2.1653363704681396,
 -0.7047545313835144,
 -1.601465106010437,
 8.821558952331543,
 -4.915244102478027,
 0.35889774560928345,
 3.7074546813964844,
 -9.940191268920898,
 6.028759956359863,
 -1.2542020082473755,
 7.444120407104492,
 6.305961608886719,
 3.1477510929107666,
 -2.9192259311676025,
 -5.526939868927002,
 5.308563709259033,
 -5.788812160491943,
 -1.776244878768921,
 2.242366313934326,
 -1.0810582637786865,
 0.21795184910297394,
 3.228680372238159,
 5.004166603088379,
 -1.5707414150238037,
 -2.8133203983306885,
 -0.3849608600139618,
 -1.6427679061889648,
 3.471346616744995,
 -9.53742504119873,
 -1.688289999961853,
 -7.961732387542725,
 10.172992706298828,
 2.917032241821289,
 -0.13143183290958405,
 -0.9951291680335999,
 0.9372531771659851,
 -2.947518825531006,
 2.086405038833618,
 -0.5033316612243652,
 -0.9980894

In [7]:
cat = create_embeddings("cat")
cat

[-2.7674975395202637,
 -1.042526125907898,
 -3.529096841812134,
 -7.024846076965332,
 -5.414438724517822,
 -3.034714937210083,
 -0.8459560871124268,
 -1.1699055433273315,
 5.624352931976318,
 0.2722442150115967,
 -3.33093523979187,
 -4.2394208908081055,
 -6.358948230743408,
 1.6854208707809448,
 8.140242576599121,
 3.898611307144165,
 2.0591187477111816,
 -1.089949607849121,
 1.723602294921875,
 -7.953774929046631,
 -4.990561485290527,
 -6.97404146194458,
 -1.417122721672058,
 16.414140701293945,
 0.504503071308136,
 -0.9377787113189697,
 -1.290452003479004,
 -2.461642265319824,
 8.580032348632812,
 6.0604963302612305,
 -4.126593589782715,
 0.656954288482666,
 -2.0201797485351562,
 0.5028010606765747,
 -0.054260071367025375,
 -1.9044362306594849,
 -7.62571907043457,
 1.1046295166015625,
 -6.240285873413086,
 10.480592727661133,
 3.7577452659606934,
 0.9428154230117798,
 0.11911182850599289,
 -4.3662285804748535,
 2.7129714488983154,
 -6.217422008514404,
 -0.7376575469970703,
 3.1688997

In [8]:
# create embeddings for the whole data chunks and store them in a list

embeddings = []
for chunk in flattened_df['chunks']:
    embeddings.append(create_embeddings(chunk))

# store the embeddings in the dataframe
flattened_df['embeddings'] = embeddings

flattened_df.head()

Unnamed: 0,path,text,chunks,embeddings
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...,"[-0.8898268938064575, 8.514480590820312, -3.09..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...,"[0.2570628523826599, 6.677108287811279, -0.602..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...,"[-0.11792352050542831, 8.048935890197754, 0.90..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...,"[2.8700883388519287, 5.1669769287109375, -0.58..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke...","[-2.2969982624053955, -5.406444072723389, -5.5..."


# Retrieval

Vector search and similiarity between our prompt and the database

### Creating an search index and reranking

In [17]:
from sklearn.neighbors import NearestNeighbors

embeddings = flattened_df['embeddings'].to_list()

# Create the search index
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(embeddings)

# To query the index, you can use the kneighbors method
distances, indices = nbrs.kneighbors(embeddings)

# Store the indices and distances in the DataFrame
flattened_df['indices'] = indices.tolist()
flattened_df['distances'] = distances.tolist()

flattened_df.head()

Unnamed: 0,path,text,chunks,embeddings,indices,distances
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...,"[-0.016977494582533836, 0.0028917337767779827,...","[0, 2, 11, 3, 1]","[0.0, 0.5220072028343841, 0.5281003720111753, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...,"[-0.014787919819355011, 0.0016925617819651961,...","[1, 0, 32, 2, 50]","[0.0, 0.5689486562368801, 0.5917805129945245, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...,"[-0.03673850744962692, -0.02062208764255047, 0...","[2, 3, 0, 5, 1]","[0.0, 0.5052294707599493, 0.5220072028343841, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...,"[-0.03166744112968445, -0.011117876507341862, ...","[3, 2, 0, 10, 11]","[0.0, 0.5052294707599493, 0.5456879720601056, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke...","[-0.007904806174337864, -0.03335562348365784, ...","[4, 12, 10, 9, 8]","[0.0, 0.5192304344185765, 0.5523440479637329, ..."


In [18]:
# Your text question
question = "what is a perceptron?"

# Convert the question to a query vector
query_vector = create_embeddings(question)  # You need to define this function

# Find the most similar documents
distances, indices = nbrs.kneighbors([query_vector])

index = []
# Print the most similar documents
for i in range(3):
    index = indices[0][i]
    for index in indices[0]:
        print(flattened_df['chunks'].iloc[index])
        print(flattened_df['path'].iloc[index])
        print(flattened_df['distances'].iloc[index])
    else:
        print(f"Index {index} not found in DataFrame")

in our model, in which case the input vector would be a vector of size N. A perceptron is a **binary classification** model, i.e. it can distinguish between two classes of input data. We will assume that for each input vector x the output of our perceptron would be either +1 or -1, depending on the class.
data/perceptron.md
[0.0, 0.5349479188905069, 0.5355415711920977, 0.5439405604626569, 0.5535213920359319]
# Introduction to Neural Networks: Perceptron One of the first attempts to implement something similar to a modern neural network was done by Frank Rosenblatt from Cornell Aeronautical Laboratory in 1957. It was a hardware implementation called "Mark-1", designed to recognize primitive geometric figures,
data/perceptron.md
[0.0, 0.4573465617700431, 0.5237117623258072, 0.5634745620918584, 0.5671484849463262]
user to adjust the resistance of a circuit. > The New York Times wrote about perceptron at that time: *the embryo of an electronic computer that [the Navy] expects will be able 

## Putting it all together to answer a question

In [22]:
import os
import openai

openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("AZURE_OPENAI_KEY")

In [24]:
user_input = "what is a perceptron?"

def chatbot(user_input):
    # Convert the question to a query vector
    query_vector = create_embeddings(user_input)

    # Find the most similar documents
    distances, indices = nbrs.kneighbors([query_vector])

    # add documents to query  to provide context
    history = []
    for index in indices[0]:
        history.append(flattened_df['chunks'].iloc[index])

    # combine the history and the user input
    history.append(user_input)

    # create a message object
    messages=[
        {"role": "system", "content": "You are an AI assiatant that helps with AI questions."},
        {"role": "user", "content": history[-1]}
    ]

    # use chat completion to generate a response
    response = openai.chat.completions.create(
        model="gpt-35-turbo-1106",
        temperature=0.7,
        max_tokens=800,
        messages=messages
    )

    return response.choices[0].message

chatbot(user_input)

ChatCompletionMessage(content='A perceptron is a type of artificial neural network model, which is a fundamental unit of a neural network. It is a simple algorithm used for binary classification tasks. The perceptron takes multiple input values, applies weights to these inputs, and produces a single output value. The output is determined by applying a step function to the weighted sum of the inputs. Perceptrons are often used as building blocks for more complex neural network architectures.', role='assistant', function_call=None, tool_calls=None)

## Testing and evaluation

A basic example of how you can use Mean Average Precision (MAP) to evaluate the responses of your model based on their relevance.

In [25]:
from sklearn.metrics import average_precision_score

# Define your test cases
test_cases = [
    {
        "query": "What is a perceptron?",
        "relevant_responses": ["A perceptron is a type of artificial neuron.", "It's a binary classifier used in machine learning."],
        "irrelevant_responses": ["A perceptron is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is machine learning?",
        "relevant_responses": ["Machine learning is a method of data analysis that automates analytical model building.", "It's a branch of artificial intelligence based on the idea that systems can learn from data, identify patterns and make decisions with minimal human intervention."],
        "irrelevant_responses": ["Machine learning is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is deep learning?",
        "relevant_responses": ["Deep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured or unlabeled.", "It's a type of machine learning."],
        "irrelevant_responses": ["Deep learning is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is a neural network?",
        "relevant_responses": ["A neural network is a series of algorithms that endeavors to recognize underlying relationships in a set of data through a process that mimics the way the human brain operates.", "It's a type of machine learning."],
        "irrelevant_responses": ["A neural network is a type of fruit.", "It's a type of car."]
    }
]

# Initialize the total average precision
total_average_precision = 0

# Test the RAG application
for test_case in test_cases:
    query = test_case["query"]
    relevant_responses = test_case["relevant_responses"]
    irrelevant_responses = test_case["irrelevant_responses"]

    # Generate a response using your RAG application
    response = chatbot(query) 

    # Create a list of all responses and a list of true binary labels
    all_responses = relevant_responses + irrelevant_responses
    true_labels = [1] * len(relevant_responses) + [0] * len(irrelevant_responses)

    # Create a list of predicted scores based on whether the response is the generated response
    predicted_scores = [1 if resp == response else 0 for resp in all_responses]

    # Calculate the average precision for this query
    average_precision = average_precision_score(true_labels, predicted_scores)

    # Add the average precision to the total average precision
    total_average_precision += average_precision

# Calculate the mean average precision
mean_average_precision = total_average_precision / len(test_cases)

In [26]:
mean_average_precision

0.5