# Setup

In [18]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

# Load Dataset

In [4]:
df_avatar = pd.read_csv("../data/avatar-dataset.csv")
df_avatar.head()

Unnamed: 0,Character,script,ep_number,Book,total_number
0,,"As the title card fades, the scene opens onto ...",1,1,1
1,Sokka,It's not getting away from me this time. [Clos...,1,1,1
2,,"The shot pans quickly from Sokka to Katara, wh...",1,1,1
3,Katara,"[Happily surprised.] Sokka, look!",1,1,1
4,Sokka,"[Close-up of Sokka; whispering.] Sshh! Katara,...",1,1,1


In [5]:
sentences = ["This is the first example", "This is the second example"]

In [6]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = model.encode(sentences)

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/Users/mmenendezg/Developer/Platzi/.venv/lib/python3.11/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ["dlopen(/Users/mmenendezg/Developer/Platzi/.venv/lib/python3.11/site-packages/tensorflow_io/python/ops/libtensorflow_io.so, 0x0006): tried: '/Users/mmenendezg/Developer/Platzi/.venv/lib/python3.11/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/mmenendezg/Developer/Platzi/.venv/lib/python3.11/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file), '/Users/mmenendezg/Developer/Platzi/.venv/lib/python3.11/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file)"]


In [12]:
embeddings[0][383]

0.04896088

In [13]:
embeddings = model.encode(df_avatar["script"], batch_size=64, show_progress_bar=True)

Batches:   0%|          | 0/209 [00:00<?, ?it/s]

In [14]:
embeddings = model.encode(df_avatar["script"], batch_size=64, show_progress_bar=True, device="cpu")

Batches:   0%|          | 0/209 [00:00<?, ?it/s]

In [15]:
df_avatar["embeddngs"] = embeddings.tolist()

In [17]:
df_avatar.head()

Unnamed: 0,Character,script,ep_number,Book,total_number,embeddngs
0,,"As the title card fades, the scene opens onto ...",1,1,1,"[-0.07994642853736877, 0.08622035384178162, 0...."
1,Sokka,It's not getting away from me this time. [Clos...,1,1,1,"[0.01779155246913433, 0.05984475463628769, -0...."
2,,"The shot pans quickly from Sokka to Katara, wh...",1,1,1,"[-0.022184552624821663, 0.05684060975909233, -..."
3,Katara,"[Happily surprised.] Sokka, look!",1,1,1,"[-0.01815211772918701, 0.11454905569553375, 0...."
4,Sokka,"[Close-up of Sokka; whispering.] Sshh! Katara,...",1,1,1,"[-0.01388164795935154, 0.0842692106962204, -0...."


# Searching

In [19]:
query_embedding = model.encode(["Show me your strength"])

df_avatar["similarity"] = df_avatar.embeddngs.apply(lambda x: util.cos_sim(x, query_embedding[0]))

In [20]:
df_avatar.head()

Unnamed: 0,Character,script,ep_number,Book,total_number,embeddngs,similarity
0,,"As the title card fades, the scene opens onto ...",1,1,1,"[-0.07994642853736877, 0.08622035384178162, 0....",[[tensor(0.0736)]]
1,Sokka,It's not getting away from me this time. [Clos...,1,1,1,"[0.01779155246913433, 0.05984475463628769, -0....",[[tensor(0.1297)]]
2,,"The shot pans quickly from Sokka to Katara, wh...",1,1,1,"[-0.022184552624821663, 0.05684060975909233, -...",[[tensor(0.0923)]]
3,Katara,"[Happily surprised.] Sokka, look!",1,1,1,"[-0.01815211772918701, 0.11454905569553375, 0....",[[tensor(0.0916)]]
4,Sokka,"[Close-up of Sokka; whispering.] Sshh! Katara,...",1,1,1,"[-0.01388164795935154, 0.0842692106962204, -0....",[[tensor(0.1331)]]


In [21]:
df_avatar.sort_values("similarity", ascending=False).head(10)

Unnamed: 0,Character,script,ep_number,Book,total_number,embeddngs,similarity
12367,Katara,"Oh, don't you worry about my strength. I have ...",16,3,56,"[0.054552704095840454, 0.033963628113269806, -...",[[tensor(0.5567)]]
3671,Roku,You think I am weak?,16,1,16,"[0.05899662524461746, -0.02860417403280735, -0...",[[tensor(0.5538)]]
9961,Azula,Your arms look so strong.,5,3,45,"[-0.005173529032617807, 0.004467979539185762, ...",[[tensor(0.5153)]]
8801,Sokka,"You don't know how much this means to me, Dad....",19,2,39,"[0.024889236316084862, 0.061347633600234985, -...",[[tensor(0.4714)]]
13305,Ozai,"Even with all the power in the world, you are ...",21,3,61,"[0.06141240522265434, 0.01065730582922697, 0.0...",[[tensor(0.4669)]]
4389,Yue,Our strength comes from the spirit of the moon...,19,1,19,"[-0.010262452997267246, 0.08041387796401978, 0...",[[tensor(0.4547)]]
6456,Azula,Do you really want to fight me?,8,2,28,"[0.007166208233684301, 0.040309466421604156, 0...",[[tensor(0.4106)]]
1812,Roku,[Close-up.] Yes. Stronger than you could even ...,8,1,8,"[0.010861261747777462, 0.07865262776613235, -0...",[[tensor(0.4094)]]
6442,Toph,People see me and think I'm weak. They want to...,8,2,28,"[0.021143291145563126, 0.015663184225559235, 0...",[[tensor(0.4093)]]
902,Suki,[Peeks her head around the door to look at the...,4,1,4,"[0.08349473029375076, 0.0786832720041275, -0.0...",[[tensor(0.3976)]]
