In [1]:
from dotenv import load_dotenv
import anthropic
import pandas as pd
import os
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

In [3]:
# load pickle file
embedding_df = pd.read_pickle("embeddings/embedding.pkl")
embedding_df.sample(5)

Unnamed: 0,Sentence,Embedding
32,Wallbreakers successfully ruin the strategies ...,"[-0.014169091, 0.040919688, -0.004156394, -0.0..."
19,from destroying records of their communication...,"[-0.09805878, -0.04712853, -0.04607563, -0.027..."
67,of Trisolaris's three suns is struck by a rela...,"[-0.021488613, -0.047500413, -0.010815905, 0.0..."
33,"including announcing his resignation, are inte...","[0.0068574557, 0.024951369, 0.046441276, 0.006..."
8,a private army and build small nuclear weapons...,"[-0.08663975, 0.032416612, -0.019097388, -0.00..."


In [4]:
question = "What did Wang Miao do?"

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

question_embedding = model.encode(question)
print(question_embedding)

[-6.77938461e-02  1.00651063e-01 -7.66228512e-03  8.76871124e-02
  2.61270311e-02  1.78627037e-02  1.08779937e-01  1.51813086e-02
 -7.37962723e-02  2.73137428e-02  6.47440255e-02  4.73283324e-03
 -3.61168967e-03  3.56935486e-02  3.55686359e-02  8.83444920e-02
  2.48198826e-02  4.26023751e-02 -5.83152957e-02 -3.11054420e-02
 -6.53457362e-03 -7.37486035e-02 -1.12962294e-02  3.86004634e-02
 -4.12410088e-02 -4.05967459e-02  2.70972457e-02  3.60601209e-02
 -3.02755684e-02 -3.08797881e-02 -6.17343783e-02  5.86370826e-02
  2.36011446e-02  2.21631993e-02 -1.81382261e-02  7.81486854e-02
  1.04214430e-01 -1.07985665e-03  1.34266643e-02 -2.02773437e-02
  6.14257390e-03 -2.10700072e-02  9.46372971e-02 -7.77823925e-02
  6.06550090e-02 -6.25379235e-02 -1.75790880e-02 -1.85542181e-02
  7.21151680e-02 -4.58323024e-02 -7.55148828e-02  2.75352448e-02
 -6.59098774e-02 -1.28208296e-02  9.67031403e-04  1.43743074e-02
  5.18845692e-02  7.53357122e-03 -4.23526904e-03 -5.36064655e-02
 -6.72690347e-02 -2.55814

In [5]:
# perform cosine similarity and get k closest neighbour

from sklearn.metrics.pairwise import cosine_similarity


def find_k_nearest_neighbors(df, query_embedding, k=5):
    # Compute cosine similarity between query embedding and all embeddings in the DataFrame
    similarities = cosine_similarity([query_embedding], list(df['Embedding'].values))[0]

    # Find indices of the top K most similar embeddings
    top_indices = similarities.argsort()[-k:][::-1]

    # Extract the sentences corresponding to the top indices
    k_nearest_neighbors = df.iloc[top_indices]['Sentence'].tolist()

    return k_nearest_neighbors

k_nearest_neighbors = find_k_nearest_neighbors(embedding_df, question_embedding, k=5)
print("K Nearest Neighbors:")
for neighbor in k_nearest_neighbors:
    print(neighbor)

K Nearest Neighbors:
Miao, a nanotechnology professor, is asked to work with Shi Qiang, a cunning detective, to investigate the mysterious suicides of several scientists, including Ye's daughter Yang Dong. The two of them notice that the world's governments are communicating closely with each other and have put aside their traditional rivalries to prepare for war. Over the next few days, Wang experiences strange hallucinations and meets with Ye. Wang sees people playing a sophisticated virtual reality video game called Three-Body (which is later revealed to have been created by the ETO as a recruitment tool) and begins to play. The game portrays
by Shen Yufei, seeking to help the Trisolarans to find a computational solution to the three-body problem, which plagues their planet. A third, smaller faction, the Survivors, intend to help the Trisolarans in exchange for their own descendants' lives while the rest of humanity dies. In the present day, Wang Miao, a nanotechnology professor, is

In [6]:
query_for_RAG = "Use the query context to answer this question:\n"
query_for_RAG += question
query_for_RAG += "\n"

query_for_RAG += "Query context: \n"
for neighbor in k_nearest_neighbors:
    query_for_RAG += neighbor
    query_for_RAG += "\n"

print(query_for_RAG)

Use the query context to answer this question:
What did Wang Miao do?
Query context: 
Miao, a nanotechnology professor, is asked to work with Shi Qiang, a cunning detective, to investigate the mysterious suicides of several scientists, including Ye's daughter Yang Dong. The two of them notice that the world's governments are communicating closely with each other and have put aside their traditional rivalries to prepare for war. Over the next few days, Wang experiences strange hallucinations and meets with Ye. Wang sees people playing a sophisticated virtual reality video game called Three-Body (which is later revealed to have been created by the ETO as a recruitment tool) and begins to play. The game portrays
by Shen Yufei, seeking to help the Trisolarans to find a computational solution to the three-body problem, which plagues their planet. A third, smaller faction, the Survivors, intend to help the Trisolarans in exchange for their own descendants' lives while the rest of humanity di

In [7]:
response = anthropic.Anthropic().messages.create(
    model="claude-3-haiku-20240307",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": query_for_RAG}
    ]
)
print(response)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Message(id='msg_01C2UUd7U8NWxs5qCNz2tXv2', content=[TextBlock(text="Based on the query context provided, it seems that Wang Miao, a nanotechnology professor, was asked to work with Shi Qiang, a cunning detective, to investigate the mysterious suicides of several scientists, including Ye's daughter Yang Dong. The two of them noticed that the world's governments were communicating closely with each other and had put aside their traditional rivalries, which suggested they were preparing for war against the Trisolarans. Additionally, Wang Miao began experiencing strange hallucinations and started playing a sophisticated virtual reality video game called Three-Body, which was later revealed to have been created by the ETO as a recruitment tool.", type='text')], model='claude-3-haiku-20240307', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(input_tokens=717, output_tokens=143))


In [8]:
from IPython.display import display, Markdown


display(Markdown(response.content[0].text))



Based on the query context provided, it seems that Wang Miao, a nanotechnology professor, was asked to work with Shi Qiang, a cunning detective, to investigate the mysterious suicides of several scientists, including Ye's daughter Yang Dong. The two of them noticed that the world's governments were communicating closely with each other and had put aside their traditional rivalries, which suggested they were preparing for war against the Trisolarans. Additionally, Wang Miao began experiencing strange hallucinations and started playing a sophisticated virtual reality video game called Three-Body, which was later revealed to have been created by the ETO as a recruitment tool.