### Extract embedding axis

In [None]:
# !pip install --upgrade accelerate transformers

In [1]:
import pandas as pd

In [2]:
# Open training_embeddings.csv and load it into a dataframe
df = pd.read_csv('training_embeddings.csv')
df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,companion,0.128,-0.082,0.109,-0.012,0.317,0.262,-0.016,0.128,0.357,...,0.395,-0.107,-0.026,-0.061,0.358,-0.138,-0.067,0.236,0.385,-0.115
1,toast,0.209,0.411,0.026,-0.0,-0.243,-0.325,0.148,0.238,-0.12,...,0.373,-0.174,0.119,-0.036,0.445,-0.029,0.145,0.169,0.364,-0.097
2,lounge,0.759,-0.116,0.116,0.113,0.46,-0.212,0.115,-0.032,0.137,...,0.259,0.07,-0.059,-0.114,0.441,-0.096,-0.074,0.196,0.215,-0.254
3,watch,0.401,-0.003,-0.061,-0.406,0.933,-0.14,-0.186,0.286,-0.17,...,0.459,-0.067,-0.266,-0.318,0.173,-0.109,0.219,-0.01,0.404,-0.161
4,haul,0.58,0.031,0.311,0.048,-0.102,0.081,0.047,0.423,-0.187,...,0.505,0.096,0.095,-0.009,0.453,0.003,0.337,0.259,0.064,0.092


In [6]:
# get slice of dataframe after first column
# get min and max values across all columns
df.iloc[:,1:].min().min(), df.iloc[:,1:].max().max()

(-5.21, 2.042)

In [15]:
# apply maximum and minimum truncation to dataframe
# this does not fundamentally change the data because it follows a normal distribution from -1 to 1
df.iloc[:,1:] = df.iloc[:,1:].clip(-1,1)

# get min and max values across all columns
df.iloc[:,1:].min().min(), df.iloc[:,1:].max().max()

(-1.0, 1.0)

In [16]:
df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,companion,0.128,-0.082,0.109,-0.012,0.317,0.262,-0.016,0.128,0.357,...,0.395,-0.107,-0.026,-0.061,0.358,-0.138,-0.067,0.236,0.385,-0.115
1,toast,0.209,0.411,0.026,-0.0,-0.243,-0.325,0.148,0.238,-0.12,...,0.373,-0.174,0.119,-0.036,0.445,-0.029,0.145,0.169,0.364,-0.097
2,lounge,0.759,-0.116,0.116,0.113,0.46,-0.212,0.115,-0.032,0.137,...,0.259,0.07,-0.059,-0.114,0.441,-0.096,-0.074,0.196,0.215,-0.254
3,watch,0.401,-0.003,-0.061,-0.406,0.933,-0.14,-0.186,0.286,-0.17,...,0.459,-0.067,-0.266,-0.318,0.173,-0.109,0.219,-0.01,0.404,-0.161
4,haul,0.58,0.031,0.311,0.048,-0.102,0.081,0.047,0.423,-0.187,...,0.505,0.096,0.095,-0.009,0.453,0.003,0.337,0.259,0.064,0.092


In [17]:
def get_x_embedding(df, x):
  """
  Extract the x column from the dataframe and store it as a list
  """
  # Extract the word and x column from the dataframe and store them as a list of lists
  # index = 0
  words = df['word'].tolist()
  emd_index = df[str(x)].tolist()
  # data = [[words[i], emd_index[i]] for i in range(len(words))]
  # print(data[:5])

  # Concatenate the two lists into a list of lists
  data = [words,emd_index]
  return data

data = get_x_embedding(df, 0)
print(len(data), len(data[0]), len(data[1]))
print(data[0][:5])
print(data[1][:5])

2 383 383
['companion', 'toast', 'lounge', 'watch', 'haul']
[0.128, 0.209, 0.759, 0.401, 0.58]


### Get axis interpretation from OpenAI

In [None]:
# install OpenAI api
# !pip install --upgrade openai

In [18]:
# load api key from secrets.json
import openai
import json

try:
    with open("secrets.json") as f:
        secrets = json.load(f)
    my_api_key = secrets["openai"]
    print("API key loaded.")
    openai.api_key = my_api_key
except FileNotFoundError:
    print("Secrets file not found. YOU NEED THEM TO RUN THIS.")

API key loaded.


In [19]:
axis_emb = get_x_embedding(df, 0)

In [21]:
# model_num = "gpt-3.5-turbo-1106"
# model_num = "gpt-4-1106-preview"
model_num = "gpt-4"

completion = openai.ChatCompletion.create(
  model=model_num,
  messages=[
    {"role": "system", "content": "You are an expert transformer embeddings labeller."},
    {"role": "user", "content": f"Below are two lists. The first list contains words that have been put into DistilBERT. DistilBERT creates an embedding with 768 dimensions or axes. The second list contains the embedding value from DistilBERT for one axis across the words. By carefully comparing and considering the embedding values for each word, please interpret the likely linguistic binary feature that this embedding axis encodes. This binary interpretation must be consistent across all the words and must be expressed as 'x vs y', where 'x' relates to words with positive embedding values and 'y' relates to words with negative embedding values. Words that relate to neither 'x' nor 'y' will have embedding values close to 0. \n\n  The output must be a Python dictionary with three items, the key is a string containing the possible binary interpretation of the axis. The value is a float, representing the confidence score from 0 to 1. \n\n The output must be the dictionary only, which can be eval-ed into code. Here is the output format: {{<first x vs first y>:<first interpretation confidence score>, <second x vs second y>:<second interpretation confidence score>, <third x vs third y>:<third interpretation confidence score>}} \n\n {axis_emb[0]}\n\n {axis_emb[1]}"}
  ]
)

print(completion.choices[0].message)

# log the stringified output into a txt file by appending it to the end of the file
with open("output.txt", "a") as f:
  f.write(str(completion))

{
  "role": "assistant",
  "content": "{\"action vs state\": 0.6, \"objects vs abstract concepts\": 0.65, \"positive sentiment vs negative sentiment\": 0.4}"
}


In [25]:
# convert the output string into a dictionary
interp_dict = eval(completion.choices[0].message.content)
print(interp_dict)

# convert the dictionary keys into a list
interp_keys = list(interp_dict.keys())

# get the index of the key with the highest value
interp_values = list(interp_dict.values())
max_interp_index = interp_values.index(max(interp_values))
print(max_interp_index)

# get the key with the highest value
max_interp_key = interp_keys[max_interp_index]
print(max_interp_key)

{'action vs state': 0.6, 'objects vs abstract concepts': 0.65, 'positive sentiment vs negative sentiment': 0.4}
1
objects vs abstract concepts


In [26]:
# split the key into two words using "vs" as the delimiter
pos_neg = max_interp_key.split(" vs ")
print(pos_neg)


['objects', 'abstract concepts']


### Evaluate interpretations using OpenAI

In [28]:
import time

In [31]:
# model_num = "gpt-3.5-turbo-1106"
# model_num = "gpt-4-1106-preview"
model_num = "gpt-4"

score_list = []
step = 30

for i in range((len(axis_emb[0])// step )+ 1):
  # test to make sure the list is being truncated correctly
  # print(i*step, (i+1)*step)
  trunc_word_list = axis_emb[0][i*step:(i+1)*step]
  # print(trunc_word_list)
  # print(len(trunc_word_list))

  is_length_correct = False

  while (is_length_correct == False):
    print("\n\nStep ", i + 1, " of ", (len(axis_emb[0])// step )+ 1)
    print(i+1, " Input length: ", len(trunc_word_list))

    completion = openai.ChatCompletion.create(
      model=model_num,
      messages=[
        {"role": "system", "content": "You are an expert word sense scorer."},
        {"role": "user", "content": f"For the list of words below, please assign it a score according to how much it relates to the following criteria: '{max_interp_key}' \n\n  The output must be a Python list of scores for each corresponding word in the provided list. The output must therefore have {len(trunc_word_list)} items, the same length as the provided list. The score is a float that ranges from -1 to 1. Positive scores suggest a strong relationship with the positive criterion, '{pos_neg[0]}', while negative scores suggest a strong relationship with the negative criterion, '{pos_neg[1]}'. Scores close to 0 suggest that the word is not related to both the positive and negative criteria. \n\n Here is an output sample: [<score for first word>, <score for second word>, ... , <score for second-last word>, <score for last word>] \n\n {trunc_word_list}"}
        # {"role": "user", "content": f"For the list of words below, please assign it a score according to how much it relates to the following criteria: {interp_keys[0]}  \n\n  The output must be a Python list of scores for each corresponding word in the provided list. The output must therefore have the same number of items as the provided list. The score is a float that ranges from -1 to 1. Positive scores suggest a high correlation to the criteria, while negative scores suggest a high opposite correlation. Scores closer to 0 suggest that the criterion is not applicable to the word. \n\n Here is an output sample: [<score for first word>, <score for second word>, ... , <score for second-last word>, <score for last word>] \n\n {axis_emb[0]}"}
      ]
    )

    # print(completion.choices[0].message)

    # log the stringified output into a txt file by appending it to the end of the file
    with open("eval_output.txt", "a") as f:
      f.write(str(completion))

    # convert the output string into a list
    try:
      scores = eval(completion.choices[0].message.content)
    except:
      print(i+1, "Error: ", completion.choices[0].message.content)
      print("Trying again...")
      continue
    print(scores)

    # check if the length is correct
    print(i+1, "Output length: ", len(scores))
    print(i+1, "Are input output lengths the same? " ,len(scores) == len(trunc_word_list))

    if len(scores) == len(trunc_word_list):
      is_length_correct = True
    else:
      print("Input output lengths are not the same. Trying again...")
      continue

    # concatenate scores with score_list
    score_list += scores

    # giving it more time – does it lead to better results?
    time.sleep(10)

with open("eval_output.txt", "a") as f:
  f.write("\nscore_list: " + str(score_list))
print("Length of embedding list: ", len(axis_emb[0]))
print("Length of score list: ", len(score_list))



Step  1  of  13
1  Input length:  30
[0.1, 0.9, 0.4, 0.9, 0.4, -0.2, -0.7, 0.3, 0.8, 0.8, -0.1, 0.3, 0.8, 0.2, -0.9, 0.6, 0.8, -0.5, 0.6, 0.8, -0.3, 0.7, 0.9, 0.3, 0.4, 0.7, -0.5, 0.1, 0.9, 0.4]
1 Output length:  30
1 Are input output lengths the same?  True


Step  2  of  13
2  Input length:  30
[0.5, -0.5, -0.9, 0.3, 0.5, -0.7, 0.8, 0.2, 0, 0.5, 0.8, -0.8, 0.9, 0.3, 0.4, 0.8, -0.7, 0, 0.9, 0.8, 0.9, -0.9, 0.9, -0.2, 0, -0.2, -0.1, 0, 0.6, 0.4]
2 Output length:  30
2 Are input output lengths the same?  True


Step  3  of  13
3  Input length:  30
[-0.9, -0.6, 0.1, 0.3, -0.8, -0.9, -0.95, -0.9, -0.95, 0.1, -0.85, -0.1, -0.6, 0.1, -0.8, 0.2, 0.1, 0.7, 0.2, 0.9, 0.1, 0.3, 0.1, 0.2, -0.85, -0.85, 0.3, 0.2, 0.05, 0.1]
3 Output length:  30
3 Are input output lengths the same?  True


Step  4  of  13
4  Input length:  30
[0.0, 0.0, 0.0, 0.0, -0.2, -0.2, 0.0, -0.8, -0.2, 0.0, 0.0, -1.0, 0.8, -0.2, -1.0, 0.0, 0.0, -1.0, 0.0, 0.0, -0.8, 0.0, -0.8, -1.0, -0.8, -0.2, -0.2, 0.9, -0.5, -1.0]
4 Out

In [32]:
# compare score_list with axis_emb[1] by subtracting them
diff_list = [score_list[i] - axis_emb[1][i] for i in range(len(score_list))]
print(diff_list[:5])

# abs and round the difference list to 3 decimal places
diff_list = [abs(round(diff, 3)) for diff in diff_list]

# sum the difference list
sum_diff = sum(diff_list)
print("Sum of diff: ", sum_diff)

# calculate the mean of the difference list
mean_diff = sum(diff_list)/len(diff_list)
print("Mean of diff: ", mean_diff)

with open("eval_output.txt", "a") as f:
  f.write("\ndiff_list: " + str(diff_list) + "\nsum of diff: " + str(sum_diff) + "\nmean of diff: " + str(mean_diff) + "\n\n")

[-0.027999999999999997, 0.6910000000000001, -0.359, 0.499, -0.17999999999999994]
Sum of diff:  195.76600000000005
Mean of diff:  0.5111383812010445


Matte's notes:
- mean difference of 0.51 is getting better, previously was 0.74.

Maybe we can set a threshold to decide if the interpretation is good. E.g. above 0.25 mean difference is bad and below 0.25 is good?

We can also make it loop through all 3 outputs if it doesn't meet the threshold

### Comparison between two runs + evaluating the approach

In [33]:
# compare between two runs

first_run = [0.2, 1.0, 1.0, 1.0, 1.0, -0.4, -0.8, -0.2, 0.8, 0.8, -0.2, 0.4, 1.0, -0.1, -1.0, 0.6, 0.7, -0.4, -0.3, 1.0, -0.7, 0.4, 1.0, 0.5, 0.6, -0.2, -0.2, -0.3, 1.0, 0.3, 0.5, -0.4, -0.9, 0.2, 0.3, -0.8, 0.6, 0.4, 0.2, 0.2, 0.7, -0.7, 0.8, 0.1, 0.1, 0.8, -0.9, -0.1, 0.7, 0.6, 0.3, -0.9, 0.8, -0.1, -0.4, 0.4, -0.1, -0.1, 0.6, 0.5, -0.8, -0.7, 0.6, 0.7, -0.7, -0.6, -0.9, -0.7, -0.9, 0.5, -0.5, -0.2, -0.2, 0.6, -0.8, 0.5, 0.5, 0.9, 0.7, 0.9, -0.2, 0.6, 0.7, 0.8, -0.3, -0.5, 0.7, 0.6, 0.5, 0.4, 0.1, 0.2, 0.4, 0.3, -0.4, -0.2, 0.2, -0.8, 0.1, 0.2, 0.2, -0.9, 0.8, -0.1, -0.9, 0.2, 0.5, -0.7, 0.3, 0.4, -0.6, 0.4, -0.3, -0.8, -0.6, 0.3, 0.6, 0.9, 0.1, -0.9, 0.1, 0.6, 0.3, 0.5, -0.5, -0.4, 0.1, -0.3, 0.2, 0.3, 0.2, 0.2, -0.5, 0.1, -0.3, 0.2, 0.6, 0.2, 0.3, 0.6, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5, -0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0, -0.1, -0.1, 0, 0, 0, -0.4, -0.4, -0.3, -0.3, -0.4, -0.4, 0.2, -0.4, -0.4, -0.5, 0, 0, 0, 0.9, 0.9, -0.9, -0.9, -0.9, -0.9, -0.9, 0.9, 0.9, 0.2, 0.1, 0.2, 0.7, 0.3, 0.1, 0.9, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9, 0.2, 0.1, 0.2, 0.9, -0.2, -1, -0.8, 0.9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -0.5, -0.5, -0.5, -1, -0.8, 0, -0.2, -0.5, -1, -1, -1, -0.8, -1, -1, -1, -0.7, -1, -1, -0.7, -0.5, -0.7, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
second_run = [0.1, 0.9, 0.4, 0.9, 0.4, -0.2, -0.7, 0.3, 0.8, 0.8, -0.1, 0.3, 0.8, 0.2, -0.9, 0.6, 0.8, -0.5, 0.6, 0.8, -0.3, 0.7, 0.9, 0.3, 0.4, 0.7, -0.5, 0.1, 0.9, 0.4, 0.5, -0.5, -0.9, 0.3, 0.5, -0.7, 0.8, 0.2, 0, 0.5, 0.8, -0.8, 0.9, 0.3, 0.4, 0.8, -0.7, 0, 0.9, 0.8, 0.9, -0.9, 0.9, -0.2, 0, -0.2, -0.1, 0, 0.6, 0.4, -0.9, -0.6, 0.1, 0.3, -0.8, -0.9, -0.95, -0.9, -0.95, 0.1, -0.85, -0.1, -0.6, 0.1, -0.8, 0.2, 0.1, 0.7, 0.2, 0.9, 0.1, 0.3, 0.1, 0.2, -0.85, -0.85, 0.3, 0.2, 0.05, 0.1, 0.0, 0.0, 0.0, 0.0, -0.2, -0.2, 0.0, -0.8, -0.2, 0.0, 0.0, -1.0, 0.8, -0.2, -1.0, 0.0, 0.0, -1.0, 0.0, 0.0, -0.8, 0.0, -0.8, -1.0, -0.8, -0.2, -0.2, 0.9, -0.5, -1.0, 0, -0.2, 0, -0.5, -0.7, -0.5, 0.1, -0.8, 0, 0, -0.1, 0.1, -0.9, -0.2, -0.8, 0, 0.5, 0.3, 0.2, 0.7, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.5, -0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, -0.6, -0.6, -0.6, -0.6, -0.6, -0.9, 0.0, 0.0, 0.0, 0.9, 0.9, -0.7, -0.7, -0.7, -0.7, -0.7, 0.9, 0.1, 0.4, 0.2, 0.3, 0.5, 0.7, 0.4, 0.6, 0.4, 0.9, 0.8, 0.9, 0.9, 0.6, 0.4, 0.6, 0.7, 0.9, 0.5, -0.7, -0.7, 0.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.2, -0.2, 0.8, 0.4, 0, -0.2, -0.2, -0.2, -0.2, -0.4, -0.4, -0.4, -0.4, -0.4, -0.2, -0.6, 0, -0.6, -0.6, -0.2, -0.2, 0, -0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

print("Length of first run: ", len(first_run))
print("Length of second run: ", len(second_run)) 

diff_run_list = [first_run[i] - second_run[i] for i in range(len(first_run))]

# abs and round the difference list to 3 decimal places
diff_run_list = [abs(round(diff, 3)) for diff in diff_run_list]
print(diff_run_list)

sum_diff_run = sum(diff_run_list)
print("Sum of diff between two runs: ", sum_diff_run)
mean_diff_run = sum(diff_run_list)/len(diff_run_list)
print("Mean of diff between two runs: ", mean_diff_run)

Length of first run:  383
Length of second run:  383
[0.1, 0.1, 0.6, 0.1, 0.6, 0.2, 0.1, 0.5, 0.0, 0.0, 0.1, 0.1, 0.2, 0.3, 0.1, 0.0, 0.1, 0.1, 0.9, 0.2, 0.4, 0.3, 0.1, 0.2, 0.2, 0.9, 0.3, 0.4, 0.1, 0.1, 0.0, 0.1, 0.0, 0.1, 0.2, 0.1, 0.2, 0.2, 0.2, 0.3, 0.1, 0.1, 0.1, 0.2, 0.3, 0.0, 0.2, 0.1, 0.2, 0.2, 0.6, 0.0, 0.1, 0.1, 0.4, 0.6, 0.0, 0.1, 0.0, 0.1, 0.1, 0.1, 0.5, 0.4, 0.1, 0.3, 0.05, 0.2, 0.05, 0.4, 0.35, 0.1, 0.4, 0.5, 0.0, 0.3, 0.4, 0.2, 0.5, 0.0, 0.3, 0.3, 0.6, 0.6, 0.55, 0.35, 0.4, 0.4, 0.45, 0.3, 0.1, 0.2, 0.4, 0.3, 0.2, 0.0, 0.2, 0.0, 0.3, 0.2, 0.2, 0.1, 0.0, 0.1, 0.1, 0.2, 0.5, 0.3, 0.3, 0.4, 0.2, 0.4, 0.5, 0.2, 0.2, 0.5, 0.8, 0.0, 0.6, 0.1, 0.1, 0.8, 0.3, 1.0, 0.2, 0.1, 0.0, 0.5, 0.2, 0.3, 0.3, 0.1, 0.4, 0.3, 0.5, 0.2, 0.1, 0.1, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 

Matte's notes:
- 0.17 difference between 2 runs seems pretty good

In [34]:
# create dataframe of words, embedding values, and scores
df = pd.DataFrame({'word': axis_emb[0], 'embedding': axis_emb[1], 'score': first_run})
print(df.head())

# save dataframe as csv
df.to_csv('llm-outputs/first_run.csv', index=False)

        word  embedding  score
0  companion      0.128    0.2
1      toast      0.209    1.0
2     lounge      0.759    1.0
3      watch      0.401    1.0
4       haul      0.580    1.0


In [35]:
# create dataframe of words, embedding values, and scores
df = pd.DataFrame({'word': axis_emb[0], 'embedding': axis_emb[1], 'score': second_run})
print(df.head())

# save dataframe as csv
df.to_csv('llm-outputs/second_run.csv', index=False)

        word  embedding  score
0  companion      0.128    0.1
1      toast      0.209    0.9
2     lounge      0.759    0.4
3      watch      0.401    0.9
4       haul      0.580    0.4


Matte's notes:
- it seems like the results are somewhat repeatable

In [37]:
# surface words from axis_emb[0] that have a difference of more than 0.5
len_a = 0
for i in range(len(diff_run_list)):
  if diff_run_list[i] > 0.75:
    print(axis_emb[0][i], "|(", first_run[i], ")", "-", "(", second_run[i], ")|", "=", diff_run_list[i])
    len_a += 1

print("Number of words with a difference of more than 0.5 (not confident): ", len_a, "of", len(diff_run_list))
print("Percentage: ", round(len_a/len(diff_run_list) * 100), "%")

surf |( -0.3 ) - ( 0.6 )| = 0.9
cooling |( -0.2 ) - ( 0.7 )| = 0.9
trumpet |( 0.6 ) - ( -0.2 )| = 0.8
sanitation |( 0.6 ) - ( -0.2 )| = 0.8
dry |( 0.5 ) - ( -0.5 )| = 1.0
largest |( 0.2 ) - ( -0.6 )| = 0.8
company |( 0.9 ) - ( 0.1 )| = 0.8
& |( -1 ) - ( 0 )| = 1
to |( -1 ) - ( 0 )| = 1
To |( -1 ) - ( 0 )| = 1
TO |( -1 ) - ( 0 )| = 1
be |( -1 ) - ( -0.2 )| = 0.8
buy |( 0 ) - ( 0.8 )| = 0.8
was |( -1 ) - ( -0.2 )| = 0.8
were |( -1 ) - ( -0.2 )| = 0.8
had |( -1 ) - ( -0.2 )| = 0.8
been |( -1 ) - ( -0.2 )| = 0.8
are |( -1 ) - ( -0.2 )| = 0.8
have |( -1 ) - ( -0.2 )| = 0.8
say |( -1 ) - ( 0 )| = 1
do |( -1 ) - ( -0.2 )| = 0.8
Number of words with a difference of more than 0.5 (not confident):  21 of 383
Percentage:  5 %


Matte's notes:
- (from prev:) my hunch is that GPT is struggling to differentiate between opposite (-1) and irrelevant (0). maybe this can be simplified or strengthened?
- my hunch seems to be confirmed. This run is far better than the previous one – 5% compared to 23% not confident.

In [38]:
# surface words from axis_emb[0] that have a difference of less than 0.25
len_b = 0
for i in range(len(diff_run_list)):
  if diff_run_list[i] < 0.25:
    print(axis_emb[0][i], "|(", first_run[i], ")", "-", "(", second_run[i], ")|", "=", diff_run_list[i])
    len_b += 1

print("Number of words with a difference of less than 0.25 (very confident): ", len_b, "of", len(diff_run_list))
print("Percentage: ", round(len_b/len(diff_run_list) * 100), "%")

companion |( 0.2 ) - ( 0.1 )| = 0.1
toast |( 1.0 ) - ( 0.9 )| = 0.1
watch |( 1.0 ) - ( 0.9 )| = 0.1
combination |( -0.4 ) - ( -0.2 )| = 0.2
majesty |( -0.8 ) - ( -0.7 )| = 0.1
nutrient |( 0.8 ) - ( 0.8 )| = 0.0
bark |( 0.8 ) - ( 0.8 )| = 0.0
rest |( -0.2 ) - ( -0.1 )| = 0.1
hold |( 0.4 ) - ( 0.3 )| = 0.1
drainage |( 1.0 ) - ( 0.8 )| = 0.2
time |( -1.0 ) - ( -0.9 )| = 0.1
drink |( 0.6 ) - ( 0.6 )| = 0.0
eat |( 0.7 ) - ( 0.8 )| = 0.1
journey |( -0.4 ) - ( -0.5 )| = 0.1
shipment |( 1.0 ) - ( 0.8 )| = 0.2
cutlery |( 1.0 ) - ( 0.9 )| = 0.1
catch |( 0.5 ) - ( 0.3 )| = 0.2
drive |( 0.6 ) - ( 0.4 )| = 0.2
moo |( 1.0 ) - ( 0.9 )| = 0.1
descend |( 0.3 ) - ( 0.4 )| = 0.1
commute |( 0.5 ) - ( 0.5 )| = 0.0
score |( -0.4 ) - ( -0.5 )| = 0.1
indulgence |( -0.9 ) - ( -0.9 )| = 0.0
cutting |( 0.2 ) - ( 0.3 )| = 0.1
containment |( 0.3 ) - ( 0.5 )| = 0.2
communication |( -0.8 ) - ( -0.7 )| = 0.1
transportation |( 0.6 ) - ( 0.8 )| = 0.2
glide |( 0.4 ) - ( 0.2 )| = 0.2
slice |( 0.2 ) - ( 0 )| = 0.2
tote |(

Matte's notes:
- This run is better than the previous one – 74% compared to 57% confident.