# Simple RAG using Open AI tools.

### Ask user to enter the key (this keeps it hidden)

In [24]:

from getpass import getpass
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


### Importing Libaraies

In [60]:
import os
import pandas as pd
import numpy as np
from openai import OpenAI
client = OpenAI()

### Uploading the file from Local

In [21]:
from google.colab import files
uploaded = files.upload()

Saving contents .csv to contents .csv


### Simple Pass Through prompt

In [131]:
Question  = "How is esethu related to phillemon"

In [132]:
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{
        "role": "user",
        "content":Question
    }]
)


In [133]:
response.choices[0].message.content

'It is not possible to determine the relationship between Esethu and Phillemon without further context. They could be siblings, partners, friends, colleagues, or have any other type of relationship. More information would be needed to accurately determine how they are related.'

## Now adding some more instruction to the promt

In [134]:
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "system", "content":"Answer in 3 lines as if you a poet"},
        {"role": "user", "content":Question}
              ]
)


In [135]:
print(response.choices[0].message.content)

In the tapestry of kin,
Esethu and Phillemon reside within,
Bound by blood, their souls twined thin.


## Now adding RAG

In [136]:
df_contents = pd.read_csv("contents .csv")
df_contents.head()

Unnamed: 0,Content
0,I like hanging around with esethu and Osego an...
1,speaks Xhosa and osego speaks Tswana.
2,Esethu is dating Omphile and Osego I am not su...
3,Esethu stays with Chippa and works with Hendri...
4,Esethu stays at Hassendal and Osego at UWC.


### performing word embedding

In [137]:
def get_embeddings(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input = [text], model=model).data[0].embedding

In [138]:
get_embeddings(df_contents["Content "].iloc[0])[:5]

[0.003178471466526389,
 -0.015129921026527882,
 -0.01564481295645237,
 -0.0401616208255291,
 -0.02603508159518242]

### Adding Embeddings on our File

In [139]:
df_contents['Embeddings'] =df_contents['Content '].apply(lambda x: get_embeddings(x))

In [140]:
df_contents.head()

Unnamed: 0,Content,Embeddings
0,I like hanging around with esethu and Osego an...,"[0.003178471466526389, -0.015129921026527882, ..."
1,speaks Xhosa and osego speaks Tswana.,"[-0.0023357756435871124, -0.019795255735516548..."
2,Esethu is dating Omphile and Osego I am not su...,"[-0.0055847084149718285, -0.03825771063566208,..."
3,Esethu stays with Chippa and works with Hendri...,"[-0.0019158576615154743, -0.020272763445973396..."
4,Esethu stays at Hassendal and Osego at UWC.,"[0.015143132768571377, -0.031157029792666435, ..."


In [141]:
question_embedding = get_embeddings(Question)
Question, question_embedding[:5]

('How is esethu related to phillemon',
 [0.002285033231601119,
  -0.025569049641489983,
  -0.01766800880432129,
  -0.0459776408970356,
  -0.036473069339990616])

### Using dot product to find the similarities between my qustio and the file info

In [142]:
def fn_dot_pro(embeddings_):
  return np.dot(embeddings_, question_embedding)

df_contents['distance'] = df_contents['Embeddings'].apply(fn_dot_pro)

In [143]:
df_contents.shape

(26, 3)

In [144]:
df_contents.head(6)

Unnamed: 0,Content,Embeddings,distance
0,I like hanging around with esethu and Osego an...,"[0.003178471466526389, -0.015129921026527882, ...",0.814467
1,speaks Xhosa and osego speaks Tswana.,"[-0.0023357756435871124, -0.019795255735516548...",0.759053
2,Esethu is dating Omphile and Osego I am not su...,"[-0.0055847084149718285, -0.03825771063566208,...",0.830958
3,Esethu stays with Chippa and works with Hendri...,"[-0.0019158576615154743, -0.020272763445973396...",0.823801
4,Esethu stays at Hassendal and Osego at UWC.,"[0.015143132768571377, -0.031157029792666435, ...",0.787673
5,Super works at the mine and is from venda clos...,"[-0.015245880000293255, -0.006726320832967758,...",0.725794


### Sort to get the file with the top similarity between my questions and embeddings

In [145]:
df_contents.sort_values('distance', ascending=False, inplace=True)
df_contents.head()

Unnamed: 0,Content,Embeddings,distance
19,"Phillemon is the son of Mokgadi and Simon, he ...","[0.005232660099864006, -0.0034421999007463455,...",0.836352
2,Esethu is dating Omphile and Osego I am not su...,"[-0.0055847084149718285, -0.03825771063566208,...",0.830958
3,Esethu stays with Chippa and works with Hendri...,"[-0.0019158576615154743, -0.020272763445973396...",0.823801
0,I like hanging around with esethu and Osego an...,"[0.003178471466526389, -0.015129921026527882, ...",0.814467
4,Esethu stays at Hassendal and Osego at UWC.,"[0.015143132768571377, -0.031157029792666435, ...",0.787673


### getting the top 3 docs from the table that has the highest similarity

In [146]:
context = df_contents.iloc[0]['Content '] + "\n" + df_contents.iloc[1]['Content '] + "\n" + df_contents.iloc[2]['Content ']+ "\n" + df_contents.iloc[3]['Content ']
print(context)
#

Phillemon is the son of Mokgadi and Simon, he was born in Limpopo at dikgale clinic 
Esethu is dating Omphile and Osego I am not sure. 
Esethu stays with Chippa and works with Hendriettal. 
I like hanging around with esethu and Osego and osego is from North west and Esethu 


In [147]:
context

'Phillemon is the son of Mokgadi and Simon, he was born in Limpopo at dikgale clinic \nEsethu is dating Omphile and Osego I am not sure. \nEsethu stays with Chippa and works with Hendriettal. \nI like hanging around with esethu and Osego and osego is from North west and Esethu '

### using an LLM - gpt to answer the questions based on the informatiom with high similarity

In [148]:
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "system", "content":"Answer in oxford english"},
        {"role": "user", "content": Question},
        {"role":"assistant","content": f'use the information in {context} to answer my question'}
    ])

In [149]:
print(response.choices[0].message.content)

Based on the information provided, it seems that Esethu and Phillemon are not directly related. They may be friends who know each other through mutual acquaintances, such as Osego and Omphile.
