## Retrieval

### Vector Creation

In [None]:
import gradio as gr

from openai import OpenAI

from dotenv import load_dotenv
load_dotenv()

client = OpenAI()

### Without context, no text from story with query

In [15]:
history_openai_format = []
message = f"query: who welcomed Elara?"
    
history_openai_format.append({"role": "user", "content": message})
    
response = client.chat.completions.create(model='gpt-4o-mini',
messages= history_openai_format,
temperature=0.2,
seed=1000,
stream=False)

In [16]:
response.choices[0].message.content

"It seems like you're asking about a specific event or context involving someone named Elara. However, without additional details, it's difficult to provide a precise answer. Could you please provide more context or specify which Elara you are referring to?"

### With context, first two chapters from the story

In [1]:
context = '''### Title: **The Quantum Navigator**

#### Chapter 1: The Discovery

In the year 2075, the world was a tapestry of advanced technologies and interconnected systems. Cities floated above the clouds, and artificial intelligence governed almost every aspect of human life. Amidst this technological utopia, in a modest laboratory in the heart of New Tokyo, Dr. Elara Hoshino was on the verge of a groundbreaking discovery.

Dr. Hoshino was a quantum physicist, a genius whose work had already revolutionized the field of quantum mechanics. Her current project was a mysterious device she called the "Quantum Navigator." It was designed to manipulate the fabric of spacetime, allowing instantaneous travel between any two points in the universe. The concept was theoretical, but Elara had spent years perfecting the mathematics and technology behind it.

On a foggy evening, as Elara meticulously calibrated the Navigator, a soft hum filled the room. The air around the device shimmered, and a swirling vortex of light appeared. Her heart raced as she realized the Navigator was operational. She cautiously approached the vortex, a mixture of fear and exhilaration coursing through her veins. With a deep breath, she stepped into the light.

#### Chapter 2: The Other Side

Elara emerged on the other side, not in a distant part of Earth but on a completely different planet. She found herself in a lush, alien jungle, with towering trees that glowed faintly in the twilight. The air was thick with the scent of unknown flora, and the sky was a deep indigo, dotted with unfamiliar constellations.

As she marveled at the beauty of this alien world, she noticed movement in the underbrush. A group of tall, slender beings with translucent skin and luminescent eyes emerged. They communicated through a series of melodic tones, which Elara's universal translator quickly deciphered. The beings were the Lumarians, an ancient race with a deep understanding of the cosmos.

The Lumarians welcomed Elara, intrigued by her arrival. They revealed that they had been aware of Earth's existence for millennia but had chosen to observe from afar. The Lumarians were peaceful and possessed advanced knowledge of quantum travel. They had long ago developed their own version of the Quantum Navigator, which they used to explore the universe and gather knowledge.
'''

In [18]:
history_openai_format = []
message = f"context: {context}, query: who welcomed Elara?"
    
history_openai_format.append({"role": "user", "content": message})
    
response = client.chat.completions.create(model='gpt-4o-mini',
messages= history_openai_format,
temperature=0.2,
seed=1000,
stream=False)

In [19]:
response.choices[0].message.content

'The Lumarians welcomed Elara when she arrived on their planet.'

## Create Embeddings

In [40]:
all_context = []

In [41]:
with open('story.txt', 'r') as file:
    for f in file.readlines():
        if f != '\n' and f != '\r':
            all_s = f.split('. ')
            all_context.extend(all_s)

In [42]:
len(all_context)

73

In [43]:
all_context[11]

'The air around the device shimmered, and a swirling vortex of light appeared'

In [44]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
# sentences = ['This is an example sentence', 'Each sentence is converted', context, context]
sentences = all_context
sentences.append("who welcomed Elara?")
# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2').to('cuda')

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input.to('cuda'))

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

print("Sentence embeddings:")
print(sentence_embeddings)


Sentence embeddings:
tensor([[-0.0473,  0.0227,  0.0392,  ...,  0.0192, -0.0031,  0.0206],
        [-0.0824,  0.0418,  0.0520,  ..., -0.0407, -0.0298, -0.0191],
        [-0.0679,  0.0118, -0.0084,  ..., -0.0755, -0.0538,  0.0421],
        ...,
        [-0.0440,  0.0123,  0.0182,  ..., -0.0137, -0.0519, -0.0059],
        [ 0.0016, -0.1037,  0.0279,  ...,  0.0429, -0.0274,  0.0180],
        [ 0.0263, -0.0158, -0.0072,  ...,  0.0951, -0.0298,  0.0023]],
       device='cuda:0')


In [45]:
sentence_embeddings.shape

torch.Size([74, 384])

In [46]:
import numpy as np

def cosine_similarity(vector_a, vector_b):
    # Calculate the dot product of the vectors
    dot_product = np.dot(vector_a, vector_b)
    
    # Calculate the norm (magnitude) of each vector
    norm_a = np.linalg.norm(vector_a)
    norm_b = np.linalg.norm(vector_b)
    
    # Compute the cosine similarity
    similarity = dot_product / (norm_a * norm_b)
    
    return similarity

In [50]:
sentences[58]

'Elara confronted him, a fierce determination in her eyes'

In [54]:
index_scores_map = {}
for i in range(len(sentences)):
    score = cosine_similarity(sentence_embeddings[-1].cpu().detach().numpy().reshape(1,-1), sentence_embeddings[i].cpu().detach().numpy())
    index_scores_map[i] = float(score[0])
    # print(f'index: {i}, score: {score}')

In [55]:
  index_scores_map

{0: 0.01538220513612032,
 1: 0.139244943857193,
 2: 0.03870378062129021,
 3: -0.009805893525481224,
 4: 0.011766266077756882,
 5: 0.3860347867012024,
 6: 0.026740487664937973,
 7: -0.005041817668825388,
 8: 0.09352340549230576,
 9: 0.47478488087654114,
 10: 0.30354538559913635,
 11: 0.041404251009225845,
 12: 0.24035179615020752,
 13: 0.13692905008792877,
 14: 0.19120171666145325,
 15: 0.1336517632007599,
 16: 0.5801576375961304,
 17: 0.1877894252538681,
 18: 0.19359081983566284,
 19: 0.11199590563774109,
 20: 0.126316636800766,
 21: 0.4495638906955719,
 22: 0.23690766096115112,
 23: 0.7066745758056641,
 24: 0.22464434802532196,
 25: 0.26562198996543884,
 26: 0.15418359637260437,
 27: 0.2914455831050873,
 28: 0.45764461159706116,
 29: 0.2436717450618744,
 30: 0.17598803341388702,
 31: 0.5322508215904236,
 32: 0.29709339141845703,
 33: 0.0661139190196991,
 34: 0.5166687369346619,
 35: 0.5700124502182007,
 36: 0.3175065815448761,
 37: 0.16544167697429657,
 38: 0.5682152509689331,
 39: 0.

In [63]:
# Sort by values in descending order
sorted_dict_desc = dict(sorted(index_scores_map.items(), key=lambda item: item[1], reverse=True))


In [71]:
relevant_info = []
for top in list(sorted_dict_desc.keys())[:5]:
    relevant_info.append(sentences[top])
    

In [72]:
relevant_info

['who welcomed Elara?',
 'The Lumarians welcomed Elara, intrigued by her arrival',
 'Elara confronted him, a fierce determination in her eyes',
 'Elara decided to remain on Luminaria, continuing her research and learning from the Lumarians',
 'Yet, with each obstacle, Elara grew more determined']

In [8]:
cosine_similarity(sentence_embeddings[0].cpu().detach().numpy(), sentence_embeddings[1].cpu().detach().numpy())

0.4045593

In [9]:
cosine_similarity(sentence_embeddings[0].cpu().detach().numpy(), sentence_embeddings[2].cpu().detach().numpy())

0.008613202

In [13]:
cosine_similarity(sentence_embeddings[1].cpu().detach().numpy(), sentence_embeddings[2].cpu().detach().numpy())

0.0075974776