### **0. Set-up**

In [1]:
# Import libraries and utils
%run '../../utils.ipynb'

In [2]:
# Get api key
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

# Set client
client = OpenAI()

In [4]:
# Load dataframe
en_simlex = pd.read_csv("../../../data/dataset/cleaned-nl-simlex-999.csv")

# Convert to tuple
tuples_list = list(zip(en_simlex['word1'], en_simlex['word2']))

In [4]:
# Show results
tuples_list

[('old', 'new'),
 ('smart', 'intelligent'),
 ('hard', 'difficult'),
 ('happy', 'cheerful'),
 ('hard', 'easy'),
 ('fast', 'rapid'),
 ('happy', 'glad'),
 ('short', 'long'),
 ('stupid', 'dumb'),
 ('weird', 'strange'),
 ('wide', 'narrow'),
 ('bad', 'awful'),
 ('easy', 'difficult'),
 ('bad', 'terrible'),
 ('hard', 'simple'),
 ('smart', 'dumb'),
 ('insane', 'crazy'),
 ('happy', 'mad'),
 ('large', 'huge'),
 ('hard', 'tough'),
 ('new', 'fresh'),
 ('sharp', 'dull'),
 ('quick', 'rapid'),
 ('dumb', 'foolish'),
 ('wonderful', 'terrific'),
 ('strange', 'odd'),
 ('happy', 'angry'),
 ('narrow', 'broad'),
 ('simple', 'easy'),
 ('old', 'fresh'),
 ('apparent', 'obvious'),
 ('inexpensive', 'cheap'),
 ('nice', 'generous'),
 ('weird', 'normal'),
 ('weird', 'odd'),
 ('bad', 'immoral'),
 ('sad', 'funny'),
 ('wonderful', 'great'),
 ('guilty', 'ashamed'),
 ('beautiful', 'wonderful'),
 ('confident', 'sure'),
 ('dumb', 'dense'),
 ('large', 'big'),
 ('nice', 'cruel'),
 ('impatient', 'anxious'),
 ('big', 'broad'),

### **1. Define and Evaluate Parameters**

In [6]:
# Define prompt
prompt = ("Rate the semantic similarity of each word pair on a scale from 0 to 10, "
          "where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. "
          "Use two decimals. The response should strictly adhere to the structure: "
          "[('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. "
          "Do not provide additional explanations or context.")

In [7]:
# Define model
model = "gpt-3.5-turbo-0125"

# Set sample size
sample_size = 15

# Delay between individual API calls
delay = 15.0

# Define number of sublists
n_sublists = 20

In [8]:
# Split list
chunks = split_into_n_lists(tuples_list, n_sublists)

# Count number of lists
print(len(chunks))

20


In [9]:
# Print prompts for each chunk
print_prompts(chunks, prompt)

Rate the semantic similarity of each word pair on a scale from 0 to 10, where 0 represents no semantic similarity, and 10 represents perfect semantic similarity. Use two decimals. The response should strictly adhere to the structure: [('word1', 'word2', <score>), ('word3', 'word4', <score>), ...]. Do not provide additional explanations or context. --- ["('old', 'new'), ('smart', 'intelligent'), ('hard', 'difficult'), ('happy', 'cheerful'), ('hard', 'easy'), ('fast', 'rapid'), ('happy', 'glad'), ('short', 'long'), ('stupid', 'dumb'), ('weird', 'strange'), ('wide', 'narrow'), ('bad', 'awful'), ('easy', 'difficult'), ('bad', 'terrible'), ('hard', 'simple'), ('smart', 'dumb'), ('insane', 'crazy'), ('happy', 'mad'), ('large', 'huge'), ('hard', 'tough'), ('new', 'fresh'), ('sharp', 'dull'), ('quick', 'rapid'), ('dumb', 'foolish'), ('wonderful', 'terrific'), ('strange', 'odd'), ('happy', 'angry'), ('narrow', 'broad'), ('simple', 'easy'), ('old', 'fresh'), ('apparent', 'obvious'), ('inexpensiv

In [10]:
# Load encoding
encoding = tiktoken.get_encoding("cl100k_base")

# Count tokens per chunk
token_counts = count_tokens_with_tiktoken(chunks, prompt)

# Show results
print("Token counts for each formatted prompt:", token_counts)

Token counts for each formatted prompt: [439, 446, 427, 430, 421, 419, 434, 441, 440, 426, 426, 433, 436, 423, 429, 425, 416, 431, 437, 407]


### **2. Extract and Process Data**

In [46]:
# Get results from API
# response = get_responses(chunks, prompt, model, sample_size, delay)

Processing: 100%|██████████| 300/300 [1:58:50<00:00, 23.77s/chunk]

Total time taken: 7130.21 seconds





In [47]:
# Define filepath
file_path = '../../../data/response/en/gpt-3.5-turbo-0125/f1.json'

# Check if file already exists
if not os.path.exists(file_path):
    with open(file_path, 'w') as f:
        json.dump(response, f)
        print("File saved successfully.")
else:
    print("File already exists. JSON was not saved to prevent overwriting.")

File saved successfully.


In [61]:
# Process data into dictionary
data_dict = process_responses(response)

# Check for values higher/lower then sample size
higher_lower_samples = {key: value for key, value in data_dict.items() if len(value) < sample_size or len(value) > sample_size}

# Show results
print(higher_lower_samples)

{('winter', 'season'): [4.0, 7.5, 6.0, 7.0, 6.0, 6.5, 6.5, 6.0, 7.0, 7.0, 7.5, 6.5, 6.5, 7.5], ('winter', 'seaso'): [8.0], ('fee', 'payment'): [0.7, 0.06, 0.31, 0.55, 0.5, 0.18, 0.54, 4.0, 0.55, 0.66, 0.32, 0.34, 0.4, 0.0], ('bible', 'hymn'): [0.5, 0.03, 0.2, 0.03, 0.1, 0.07, 0.37, 2.0, 0.7, 0.38, 0.33, 0.27, 0.27, 0.0], ('exit', 'doorway'): [0.8, 0.1, 0.28, 0.69, 0.1, 0.29, 0.37, 2.0, 0.45, 0.51, 0.37, 0.34, 0.22, 0.0], ('man', 'sentry'): [0.4, 0.05, 0.17, 0.22, 0.3, 0.04, 0.44, 2.0, 0.45, 0.44, 0.27, 0.11, 0.31, 0.0], ('aisle', 'hall'): [0.3, 0.11, 0.26, 0.44, 0.1, 0.39, 0.51, 2.0, 0.3, 0.41, 0.18, 0.24, 0.38, 0.0], ('whiskey', 'gin'): [0.7, 0.16, 0.51, 0.8, 0.4, 0.48, 0.69, 5.0, 0.8, 0.73, 0.58, 0.55, 0.6, 0.0], ('blood', 'marrow'): [0.8, 0.13, 0.51, 0.53, 0.4, 0.35, 0.4, 2.0, 0.4, 0.66, 0.35, 0.57, 0.41, 0.0], ('oil', 'mink'): [0.2, 0.03, 0.16, 0.0, 0.0, 0.02, 0.29, 1.0, 0.15, 0.07, 0.0, 0.06, 0.08, 0.0], ('floor', 'deck'): [0.6, 0.21, 0.45, 0.58, 0.4, 0.76, 0.56, 3.0, 0.35, 0.6, 0

In [62]:
# Print duplicate word pairs
print_duplicate_word_pairs(en_simlex, data_dict)

Empty DataFrame
Columns: [Combined_Columns]
Index: []
Empty DataFrame
Columns: [Combined_Columns]
Index: []


In [75]:
# Convert dict to Pandas DataFrame
df = create_dataframe(data_dict)

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.05,0.10,1.00,1.0,0.20,0.10,0.15,1.5,0.10,0.15,3.50,0.01,0.20,1.00,0.10
1,smart,intelligent,0.90,0.95,1.00,1.0,0.96,0.95,0.95,9.5,0.95,0.90,9.50,0.93,0.95,0.94,1.00
2,hard,difficult,0.90,0.80,0.71,1.0,0.92,0.90,0.90,8.0,0.85,0.80,7.00,0.85,0.85,0.97,0.85
3,happy,cheerful,0.80,0.80,0.90,0.0,0.70,0.75,0.80,8.5,0.85,0.80,8.00,0.81,0.70,0.86,0.90
4,hard,easy,0.10,0.10,0.10,0.0,0.16,0.05,0.20,2.0,0.20,0.20,2.00,0.15,0.10,0.22,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,join,acquire,2.00,4.00,3.50,0.3,0.60,0.75,3.00,2.5,4.50,0.50,0.45,0.70,5.00,0.50,0.31
996,send,attend,1.00,1.00,1.00,0.2,0.30,0.00,2.00,2.0,3.50,0.00,0.21,0.40,3.00,0.20,0.29
997,gather,attend,2.00,2.00,2.00,0.3,0.45,0.50,2.00,3.0,3.00,0.00,0.32,0.70,4.00,0.30,0.37
998,absorb,withdraw,1.00,1.00,1.50,0.1,0.20,0.00,1.00,1.5,1.50,0.00,0.20,0.30,2.00,0.10,0.16


In [76]:
# Count null values
count_null_values = df.isnull().sum()

# Show results
print("Null value counts per column:", count_null_values)

Null value counts per column: word1                   0
word2                   0
similarity_score_1      0
similarity_score_2      1
similarity_score_3      1
similarity_score_4      1
similarity_score_5      1
similarity_score_6      1
similarity_score_7      1
similarity_score_8      1
similarity_score_9      1
similarity_score_10     1
similarity_score_11     1
similarity_score_12     1
similarity_score_13     1
similarity_score_14     1
similarity_score_15    17
dtype: int64


In [77]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
196,winter,season,4.0,7.5,6.0,7.0,6.0,6.5,6.5,6.0,7.0,7.0,7.5,6.5,6.5,7.5,
200,winter,seaso,8.0,,,,,,,,,,,,,,
486,fee,payment,0.7,0.06,0.31,0.55,0.5,0.18,0.54,4.0,0.55,0.66,0.32,0.34,0.4,0.0,
487,bible,hymn,0.5,0.03,0.2,0.03,0.1,0.07,0.37,2.0,0.7,0.38,0.33,0.27,0.27,0.0,
488,exit,doorway,0.8,0.1,0.28,0.69,0.1,0.29,0.37,2.0,0.45,0.51,0.37,0.34,0.22,0.0,
489,man,sentry,0.4,0.05,0.17,0.22,0.3,0.04,0.44,2.0,0.45,0.44,0.27,0.11,0.31,0.0,
490,aisle,hall,0.3,0.11,0.26,0.44,0.1,0.39,0.51,2.0,0.3,0.41,0.18,0.24,0.38,0.0,
491,whiskey,gin,0.7,0.16,0.51,0.8,0.4,0.48,0.69,5.0,0.8,0.73,0.58,0.55,0.6,0.0,
492,blood,marrow,0.8,0.13,0.51,0.53,0.4,0.35,0.4,2.0,0.4,0.66,0.35,0.57,0.41,0.0,
493,oil,mink,0.2,0.03,0.16,0.0,0.0,0.02,0.29,1.0,0.15,0.07,0.0,0.06,0.08,0.0,


In [78]:
# Manually fix inconsistencies
df.loc[(df['word1'] == 'winter') & (df['word2'] == 'season'), 'similarity_score_15'] = 8.00

# Show results
df.loc[(df['word1'] == 'winter') & (df['word2'] == 'season')]

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
196,winter,season,4.0,7.5,6.0,7.0,6.0,6.5,6.5,6.0,7.0,7.0,7.5,6.5,6.5,7.5,8.0


In [79]:
# Drop faulty row
df = df[~((df['word1'] == 'winter') & (df['word2'] == 'seaso'))]

# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.05,0.10,1.00,1.0,0.20,0.10,0.15,1.5,0.10,0.15,3.50,0.01,0.20,1.00,0.10
1,smart,intelligent,0.90,0.95,1.00,1.0,0.96,0.95,0.95,9.5,0.95,0.90,9.50,0.93,0.95,0.94,1.00
2,hard,difficult,0.90,0.80,0.71,1.0,0.92,0.90,0.90,8.0,0.85,0.80,7.00,0.85,0.85,0.97,0.85
3,happy,cheerful,0.80,0.80,0.90,0.0,0.70,0.75,0.80,8.5,0.85,0.80,8.00,0.81,0.70,0.86,0.90
4,hard,easy,0.10,0.10,0.10,0.0,0.16,0.05,0.20,2.0,0.20,0.20,2.00,0.15,0.10,0.22,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,join,acquire,2.00,4.00,3.50,0.3,0.60,0.75,3.00,2.5,4.50,0.50,0.45,0.70,5.00,0.50,0.31
996,send,attend,1.00,1.00,1.00,0.2,0.30,0.00,2.00,2.0,3.50,0.00,0.21,0.40,3.00,0.20,0.29
997,gather,attend,2.00,2.00,2.00,0.3,0.45,0.50,2.00,3.0,3.00,0.00,0.32,0.70,4.00,0.30,0.37
998,absorb,withdraw,1.00,1.00,1.50,0.1,0.20,0.00,1.00,1.5,1.50,0.00,0.20,0.30,2.00,0.10,0.16


In [80]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
486,fee,payment,0.7,0.06,0.31,0.55,0.5,0.18,0.54,4.0,0.55,0.66,0.32,0.34,0.4,0.0,
487,bible,hymn,0.5,0.03,0.2,0.03,0.1,0.07,0.37,2.0,0.7,0.38,0.33,0.27,0.27,0.0,
488,exit,doorway,0.8,0.1,0.28,0.69,0.1,0.29,0.37,2.0,0.45,0.51,0.37,0.34,0.22,0.0,
489,man,sentry,0.4,0.05,0.17,0.22,0.3,0.04,0.44,2.0,0.45,0.44,0.27,0.11,0.31,0.0,
490,aisle,hall,0.3,0.11,0.26,0.44,0.1,0.39,0.51,2.0,0.3,0.41,0.18,0.24,0.38,0.0,
491,whiskey,gin,0.7,0.16,0.51,0.8,0.4,0.48,0.69,5.0,0.8,0.73,0.58,0.55,0.6,0.0,
492,blood,marrow,0.8,0.13,0.51,0.53,0.4,0.35,0.4,2.0,0.4,0.66,0.35,0.57,0.41,0.0,
493,oil,mink,0.2,0.03,0.16,0.0,0.0,0.02,0.29,1.0,0.15,0.07,0.0,0.06,0.08,0.0,
494,floor,deck,0.6,0.21,0.45,0.58,0.4,0.76,0.56,3.0,0.35,0.6,0.44,0.6,0.22,0.0,
495,roof,floor,0.6,0.15,0.44,0.47,0.4,0.45,0.5,3.0,0.6,0.36,0.39,0.39,0.27,0.0,


In [81]:
# Extract missing word pairs
missing_word_pair_list = list(zip(rows_with_null['word1'], rows_with_null['word2']))

# Show results
missing_word_pair_list

[('fee', 'payment'),
 ('bible', 'hymn'),
 ('exit', 'doorway'),
 ('man', 'sentry'),
 ('aisle', 'hall'),
 ('whiskey', 'gin'),
 ('blood', 'marrow'),
 ('oil', 'mink'),
 ('floor', 'deck'),
 ('roof', 'floor'),
 ('door', 'floor'),
 ('shoulder', 'head'),
 ('wagon', 'carriage'),
 ('car', 'carriage'),
 ('elbow', 'ankle')]

In [82]:
# Format message
formatted_prompt = format_prompt(missing_word_pair_list, prompt)
messages = [{"role": "user", "content": formatted_prompt}]

# Make API call
# completion = client.chat.completions.create(
#     model=model,
#     messages=messages,
#     n=1)

# Store response content
missing_word_pairs_response = [completion.choices[0].message.content]

In [83]:
# Process data into dictionary
missing_word_pairs_dict = process_responses(missing_word_pairs_response)

# Show results
missing_word_pairs_dict

{('fee', 'payment'): [9.0],
 ('bible', 'hymn'): [2.0],
 ('exit', 'doorway'): [1.0],
 ('man', 'sentry'): [1.0],
 ('aisle', 'hall'): [5.0],
 ('whiskey', 'gin'): [4.0],
 ('blood', 'marrow'): [1.0],
 ('oil', 'mink'): [1.0],
 ('floor', 'deck'): [2.0],
 ('roof', 'floor'): [1.0],
 ('door', 'floor'): [1.0],
 ('shoulder', 'head'): [2.0],
 ('wagon', 'carriage'): [8.0],
 ('car', 'carriage'): [8.0],
 ('elbow', 'ankle'): [1.0]}

In [84]:
# Iterate over DataFrame and replace missing values
for index, row in df.iterrows():
    word_pair = (row['word1'], row['word2'])

    # Check if current value is NaN
    if pd.isna(row['similarity_score_15']):
        if word_pair in missing_word_pairs_dict:

            # Extract first element from the list
            df.at[index, 'similarity_score_15'] = missing_word_pairs_dict[word_pair][0]

# Check if any NaN values left
print(df[df['similarity_score_15'].isna()])

Empty DataFrame
Columns: [word1, word2, similarity_score_1, similarity_score_2, similarity_score_3, similarity_score_4, similarity_score_5, similarity_score_6, similarity_score_7, similarity_score_8, similarity_score_9, similarity_score_10, similarity_score_11, similarity_score_12, similarity_score_13, similarity_score_14, similarity_score_15]
Index: []


In [85]:
# Filter the DataFrame based on pairs
filtered_df = df[df.apply(lambda row: (row['word1'], row['word2']) in missing_word_pair_list, axis=1)]

# Select column
result = filtered_df[['word1', 'word2', 'similarity_score_15']]

# Show results
print(result)

        word1     word2  similarity_score_15
486       fee   payment                  9.0
487     bible      hymn                  2.0
488      exit   doorway                  1.0
489       man    sentry                  1.0
490     aisle      hall                  5.0
491   whiskey       gin                  4.0
492     blood    marrow                  1.0
493       oil      mink                  1.0
494     floor      deck                  2.0
495      roof     floor                  1.0
496      door     floor                  1.0
497  shoulder      head                  2.0
498     wagon  carriage                  8.0
499       car  carriage                  8.0
500     elbow     ankle                  1.0


In [86]:
# Check for rows with at least one null value
rows_with_null = df[df.isnull().any(axis=1)]

# Show results
rows_with_null

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15


In [87]:
# Show results
df

Unnamed: 0,word1,word2,similarity_score_1,similarity_score_2,similarity_score_3,similarity_score_4,similarity_score_5,similarity_score_6,similarity_score_7,similarity_score_8,similarity_score_9,similarity_score_10,similarity_score_11,similarity_score_12,similarity_score_13,similarity_score_14,similarity_score_15
0,old,new,0.05,0.10,1.00,1.0,0.20,0.10,0.15,1.5,0.10,0.15,3.50,0.01,0.20,1.00,0.10
1,smart,intelligent,0.90,0.95,1.00,1.0,0.96,0.95,0.95,9.5,0.95,0.90,9.50,0.93,0.95,0.94,1.00
2,hard,difficult,0.90,0.80,0.71,1.0,0.92,0.90,0.90,8.0,0.85,0.80,7.00,0.85,0.85,0.97,0.85
3,happy,cheerful,0.80,0.80,0.90,0.0,0.70,0.75,0.80,8.5,0.85,0.80,8.00,0.81,0.70,0.86,0.90
4,hard,easy,0.10,0.10,0.10,0.0,0.16,0.05,0.20,2.0,0.20,0.20,2.00,0.15,0.10,0.22,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,join,acquire,2.00,4.00,3.50,0.3,0.60,0.75,3.00,2.5,4.50,0.50,0.45,0.70,5.00,0.50,0.31
996,send,attend,1.00,1.00,1.00,0.2,0.30,0.00,2.00,2.0,3.50,0.00,0.21,0.40,3.00,0.20,0.29
997,gather,attend,2.00,2.00,2.00,0.3,0.45,0.50,2.00,3.0,3.00,0.00,0.32,0.70,4.00,0.30,0.37
998,absorb,withdraw,1.00,1.00,1.50,0.1,0.20,0.00,1.00,1.5,1.50,0.00,0.20,0.30,2.00,0.10,0.16


In [88]:
# Define file_path
file_path = '../../../data/prompt/en/gpt-3.5-turbo-0125/f1.csv'

# Check if file already exists
if not os.path.exists(file_path):
    df.to_csv(file_path, index=False)
    print("File saved successfully.")
else:
    print("File already exists. Dataframe was not saved to prevent overwriting.")

File saved successfully.
