## Word Embedding

In [3]:
import os
import numpy as np
from docx import Document
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [4]:
def pad_embeddings(a, b):
    target_length = max(len(a), len(b))
    a = np.pad(a, (0, target_length - len(a)), 'constant')
    b = np.pad(b, (0, target_length - len(b)), 'constant')
    return a, b

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
baseline_text = """
In the picture, a busy scene in a kitchen is depicted. Here are the details:

1. **People**:
   - There is a woman, likely the mother, standing by the sink washing dishes. She is holding a plate and a dish towel.
   - A young boy is standing on a stool, reaching into an upper cabinet where there is a jar labeled "COOKIE JAR." He has one hand in the jar and appears to be retrieving a cookie.
   - A young girl stands next to the boy, reaching up as if to help or receive a cookie from him.

2. **Actions**:
   - The woman is washing dishes, but the sink is overflowing with water, which is spilling onto the floor.
   - The boy is taking cookies from the jar.
   - The girl is either reaching to help the boy or to receive a cookie from him.

3. **Objects**:
   - **Cookie jar**: Clearly labeled and located in an upper cabinet.
   - **Stool**: The boy is standing on it to reach the cookie jar.
   - **Sink**: Overflowing with water, indicating a possible plumbing issue or neglect due to distraction.
   - **Dishes**: The woman is holding one, and there are a few other dishes visible on the counter.

4. **Environment**:
   - **Kitchen**: The setting is a typical kitchen with cabinets, a counter, a sink, and a window.
   - **Window**: Through the window, a scene of a tree or bush is visible, suggesting it might be a backyard or garden view.
   - **Curtains**: The window has curtains that are pulled back.

The overall scene shows a mix of normal daily activity (dishwashing) with a bit of mischief (children reaching for cookies) and a potential mishap (overflowing sink).
"""

In [None]:
def read_docx(file_path):
    doc = Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return full_text

In [None]:
full_text_list = read_docx("./data/P16_S1_check.docx")

In [None]:
full_text_list


In [None]:
t1_start_index = 0
t1_end_index = 0
for paragraph in full_text_list:
    if "storytelling" in paragraph.lower():
        break
    t1_start_index += 1
for paragraph in full_text_list:
    if "t2" in paragraph.lower():
        break
    t1_end_index += 1

t2_start_index = 0
t2_end_index = len(full_text_list) - 1
for paragraph in full_text_list:
    if "different game" in paragraph.lower():
        break
    t2_start_index += 1

In [None]:
print(t1_start_index, t1_end_index, t2_start_index, t2_end_index)

print(full_text_list[t1_start_index])
print(full_text_list[t1_end_index])

print(full_text_list[t2_start_index])
print(full_text_list[t2_end_index])

In [None]:
T1 = (full_text_list[t1_start_index:t1_end_index])

In [None]:
T2 = (full_text_list[t2_start_index:t2_end_index])

In [None]:
T1_processed = list(
    filter(lambda x: not ((x == "") or ("speaker 0" in x.lower()) or (x is None) or ("first prompt" in x.lower())),
           T1))
T2_processed = list(
    filter(lambda x: not ((x == "") or ("speaker 0" in x.lower()) or (x is None) or ("first prompt" in x.lower())),
           T2))

In [None]:
for i in range(0, len(T1_processed)):
    T1_processed[i] = T1_processed[i].replace("Speaker 1: ", "")
for i in range(0, len(T2_processed)):
    T2_processed[i] = T2_processed[i].replace("Speaker 1: ", "")

In [None]:
T1_processed

In [None]:
"".join(T1_processed)

In [None]:
"".join(T2_processed)

In [None]:
test_text = "".join(T1_processed)

In [None]:
baseline_text_emb = get_embedding(baseline_text)
test_text_emb = get_embedding(test_text)

In [None]:
baseline_text_emb, test_text_emb = pad_embeddings(baseline_text_emb, test_text_emb)

In [None]:
cosine_similarity(baseline_text_emb, test_text_emb)

## Small model

In [11]:
es_text = "ganadora"
en_text = "winner"
es_emb = get_embedding(es_text)
en_emb = get_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.5882354876342293


In [12]:
es_text = "Me han regalado seis copas de cristal."
en_text = "I’ve been given six crystal glasses."
es_emb = get_embedding(es_text)
en_emb = get_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.7415913042988763


In [13]:
es_text = "Quedamos finalistas en la Copa del Mundo."
en_text = "We reached the World Cup final."
es_emb = get_embedding(es_text)
en_emb = get_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.6796877706385048


In [15]:
es_text = """
En el centro de la escena, hay una mujer parada en el fregadero lavando los platos. Tiene el cabello corto y lleva un vestido sin mangas. Curiosamente, el agua se está desbordando del fregadero y derramándose en el piso, pero ella parece bastante tranquila y ajena al desorden. Tal vez está perdida en sus pensamientos o simplemente muy concentrada en su tarea.
A la izquierda, hay un niño parado en un taburete, alcanzando un frasco de galletas en el armario. Parece muy decidido y concentrado en conseguir esas galletas. Debajo de él, una niña está extendiendo la mano, esperando recibir una galleta para ella misma. Está parada en el suelo, luciendo emocionada y tal vez un poco impaciente.
A través de la ventana encima del fregadero, hay una vista de un paisaje tranquilo con árboles y una cerca. Esto añade un contraste agradable y sereno a la escena algo caótica de la cocina.
El mostrador al lado del fregadero tiene varios platos y tazones.
La atmósfera general de la imagen se siente un poco caótica pero aún hogareña. La mujer en el fregadero parece estar en su propio mundo, posiblemente relajada o tal vez soñando despierta. Por otro lado, el niño y la niña están llenos de energía y emoción, impulsados por su deseo de alcanzar las galletas.
Parece que los niños se sienten traviesos y aventureros, mientras que la mujer está muy calmada o completamente inconsciente de la situación a su alrededor. Este contraste crea una escena humorística y animada.
"""

en_text = """
In the center of the scene, there's a woman standing at the sink doing the dishes. She has short hair and is wearing a sleeveless dress. Interestingly, water is overflowing from the sink and spilling onto the floor, but she seems quite calm and oblivious to the mess. Maybe she's lost in thought or just really focused on her task.
To the left, there's a boy standing on a stool, reaching into a cupboard to grab some cookies from a jar. He seems quite determined and focused on getting those cookies. Below him, a girl is eagerly reaching up, hoping to get a cookie for herself. She's standing on the floor, looking excited and maybe a bit impatient.
Through the window above the sink, there is a view of a peaceful landscape with trees and a fence. This adds a nice, tranquil contrast to the somewhat chaotic kitchen scene.
The counter next to the sink has several plates and bowls.
The overall atmosphere of the picture feels a bit chaotic but still homely. The woman at the sink seems to be in her own world, possibly relaxed or maybe daydreaming. The boy and girl, on the other hand, are full of energy and excitement, driven by their desire to get to the cookies.
It seems like the kids are feeling mischievous and adventurous, while the woman is either very calm or completely unaware of the situation around her. This contrast creates a humorous and lively scene.
"""

es_emb = get_embedding(es_text.replace('\n', ''))
en_emb = get_embedding(en_text.replace('\n', ''))
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))


0.7913564745129511


## Large model

In [10]:
def get_large_embedding(text):
    return get_embedding(text, "text-embedding-3-large")

In [16]:
es_text = "ganadora"
en_text = "winner"
es_emb = get_large_embedding(es_text)
en_emb = get_large_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.5894377216156999


In [17]:
es_text = "Me han regalado seis copas de cristal."
en_text = "I’ve been given six crystal glasses."
es_emb = get_large_embedding(es_text)
en_emb = get_large_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.740733668001387


In [18]:
es_text = "Quedamos finalistas en la Copa del Mundo."
en_text = "We reached the World Cup final."
es_emb = get_large_embedding(es_text)
en_emb = get_large_embedding(en_text)
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))

0.6724375980289032


In [19]:
es_text = """
En el centro de la escena, hay una mujer parada en el fregadero lavando los platos. Tiene el cabello corto y lleva un vestido sin mangas. Curiosamente, el agua se está desbordando del fregadero y derramándose en el piso, pero ella parece bastante tranquila y ajena al desorden. Tal vez está perdida en sus pensamientos o simplemente muy concentrada en su tarea.
A la izquierda, hay un niño parado en un taburete, alcanzando un frasco de galletas en el armario. Parece muy decidido y concentrado en conseguir esas galletas. Debajo de él, una niña está extendiendo la mano, esperando recibir una galleta para ella misma. Está parada en el suelo, luciendo emocionada y tal vez un poco impaciente.
A través de la ventana encima del fregadero, hay una vista de un paisaje tranquilo con árboles y una cerca. Esto añade un contraste agradable y sereno a la escena algo caótica de la cocina.
El mostrador al lado del fregadero tiene varios platos y tazones.
La atmósfera general de la imagen se siente un poco caótica pero aún hogareña. La mujer en el fregadero parece estar en su propio mundo, posiblemente relajada o tal vez soñando despierta. Por otro lado, el niño y la niña están llenos de energía y emoción, impulsados por su deseo de alcanzar las galletas.
Parece que los niños se sienten traviesos y aventureros, mientras que la mujer está muy calmada o completamente inconsciente de la situación a su alrededor. Este contraste crea una escena humorística y animada.
"""

en_text = """
In the center of the scene, there's a woman standing at the sink doing the dishes. She has short hair and is wearing a sleeveless dress. Interestingly, water is overflowing from the sink and spilling onto the floor, but she seems quite calm and oblivious to the mess. Maybe she's lost in thought or just really focused on her task.
To the left, there's a boy standing on a stool, reaching into a cupboard to grab some cookies from a jar. He seems quite determined and focused on getting those cookies. Below him, a girl is eagerly reaching up, hoping to get a cookie for herself. She's standing on the floor, looking excited and maybe a bit impatient.
Through the window above the sink, there is a view of a peaceful landscape with trees and a fence. This adds a nice, tranquil contrast to the somewhat chaotic kitchen scene.
The counter next to the sink has several plates and bowls.
The overall atmosphere of the picture feels a bit chaotic but still homely. The woman at the sink seems to be in her own world, possibly relaxed or maybe daydreaming. The boy and girl, on the other hand, are full of energy and excitement, driven by their desire to get to the cookies.
It seems like the kids are feeling mischievous and adventurous, while the woman is either very calm or completely unaware of the situation around her. This contrast creates a humorous and lively scene.
"""

es_emb = get_large_embedding(es_text.replace('\n', ''))
en_emb = get_large_embedding(en_text.replace('\n', ''))
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))


0.7945417377385514


## Baseline vs Response

In [20]:
en_text = """
In the center of the scene, there's a woman standing at the sink doing the dishes. She has short hair and is wearing a sleeveless dress. Interestingly, water is overflowing from the sink and spilling onto the floor, but she seems quite calm and oblivious to the mess. Maybe she's lost in thought or just really focused on her task. To the left, there's a boy standing on a stool, reaching into a cupboard to grab some cookies from a jar. He seems quite determined and focused on getting those cookies. Below him, a girl is eagerly reaching up, hoping to get a cookie for herself. She's standing on the floor, looking excited and maybe a bit impatient. Through the window above the sink, there is a view of a peaceful landscape with trees and a fence. This adds a nice, tranquil contrast to the somewhat chaotic kitchen scene. The counter next to the sink has several plates and bowls. The overall atmosphere of the picture feels a bit chaotic but still homely. The woman at the sink seems to be in her own world, possibly relaxed or maybe daydreaming. The boy and girl, on the other hand, are full of energy and excitement, driven by their desire to get to the cookies. It seems like the kids are feeling mischievous and adventurous, while the woman is either very calm or completely unaware of the situation around her. This contrast creates a humorous and lively scene."""

en_baseline = """
In the center of the scene, there's a woman standing at the sink doing the dishes. She has short hair and is wearing a sleeveless dress. Interestingly, water is overflowing from the sink and spilling onto the floor, but she seems quite calm and oblivious to the mess. Maybe she's lost in thought or just really focused on her task.
To the left, there's a boy standing on a stool, reaching into a cupboard to grab some cookies from a jar. He seems quite determined and focused on getting those cookies. Below him, a girl is eagerly reaching up, hoping to get a cookie for herself. She's standing on the floor, looking excited and maybe a bit impatient.
Through the window above the sink, there is a view of a peaceful landscape with trees and a fence. This adds a nice, tranquil contrast to the somewhat chaotic kitchen scene.
The counter next to the sink has several plates and bowls.
The overall atmosphere of the picture feels a bit chaotic but still homely. The woman at the sink seems to be in her own world, possibly relaxed or maybe daydreaming. The boy and girl, on the other hand, are full of energy and excitement, driven by their desire to get to the cookies.
It seems like the kids are feeling mischievous and adventurous, while the woman is either very calm or completely unaware of the situation around her. This contrast creates a humorous and lively scene.
"""

es_emb = get_embedding(es_text.replace('\n', ''))
en_baseline_emb = get_embedding(en_baseline.replace('\n', ''))
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(en_baseline_emb, en_emb))


ValueError: shapes (1536,) and (3072,) not aligned: 1536 (dim 0) != 3072 (dim 0)

In [None]:
es_text = """
En el centro de la escena, hay una mujer parada en el fregadero lavando los platos. Tiene el cabello corto y lleva un vestido sin mangas. Curiosamente, el agua se está desbordando del fregadero y derramándose en el piso, pero ella parece bastante tranquila y ajena al desorden. Tal vez está perdida en sus pensamientos o simplemente muy concentrada en su tarea.
A la izquierda, hay un niño parado en un taburete, alcanzando un frasco de galletas en el armario. Parece muy decidido y concentrado en conseguir esas galletas. Debajo de él, una niña está extendiendo la mano, esperando recibir una galleta para ella misma. Está parada en el suelo, luciendo emocionada y tal vez un poco impaciente.
A través de la ventana encima del fregadero, hay una vista de un paisaje tranquilo con árboles y una cerca. Esto añade un contraste agradable y sereno a la escena algo caótica de la cocina.
El mostrador al lado del fregadero tiene varios platos y tazones.
La atmósfera general de la imagen se siente un poco caótica pero aún hogareña. La mujer en el fregadero parece estar en su propio mundo, posiblemente relajada o tal vez soñando despierta. Por otro lado, el niño y la niña están llenos de energía y emoción, impulsados por su deseo de alcanzar las galletas.
Parece que los niños se sienten traviesos y aventureros, mientras que la mujer está muy calmada o completamente inconsciente de la situación a su alrededor. Este contraste crea una escena humorística y animada.
"""

es_baseline = """
En el centro de la escena, hay una mujer parada en el fregadero lavando los platos. Tiene el cabello corto y lleva un vestido sin mangas. Curiosamente, el agua se está desbordando del fregadero y derramándose en el piso, pero ella parece bastante tranquila y ajena al desorden. Tal vez está perdida en sus pensamientos o simplemente muy concentrada en su tarea.
A la izquierda, hay un niño parado en un taburete, alcanzando un frasco de galletas en el armario. Parece muy decidido y concentrado en conseguir esas galletas. Debajo de él, una niña está extendiendo la mano, esperando recibir una galleta para ella misma. Está parada en el suelo, luciendo emocionada y tal vez un poco impaciente.
A través de la ventana encima del fregadero, hay una vista de un paisaje tranquilo con árboles y una cerca. Esto añade un contraste agradable y sereno a la escena algo caótica de la cocina.
El mostrador al lado del fregadero tiene varios platos y tazones.
La atmósfera general de la imagen se siente un poco caótica pero aún hogareña. La mujer en el fregadero parece estar en su propio mundo, posiblemente relajada o tal vez soñando despierta. Por otro lado, el niño y la niña están llenos de energía y emoción, impulsados por su deseo de alcanzar las galletas.
Parece que los niños se sienten traviesos y aventureros, mientras que la mujer está muy calmada o completamente inconsciente de la situación a su alrededor. Este contraste crea una escena humorística y animada."""

es_emb = get_embedding(es_text.replace('\n', ''))
en_emb = get_embedding(en_text.replace('\n', ''))
# es_emb, en_emb = pad_embeddings(es_emb, en_emb)
print(cosine_similarity(es_emb, en_emb))
