## setup

In [None]:
# install OpenAI api
# !pip install --upgrade openai

In [88]:
import pandas as pd
import time
import openai
import json
import re
import numpy as np
import os

In [10]:
# load api key from secrets.json
try:
    with open("secrets.json") as f:
        secrets = json.load(f)
    my_api_key = secrets["openai"]
    print("API key loaded.")
    openai.api_key = my_api_key
except FileNotFoundError:
    print("Secrets file not found. YOU NEED THEM TO RUN THIS.")

API key loaded.


## data

In [141]:
client = openai.OpenAI(api_key = openai.api_key)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# CUSTOMIZE HERE
data_gen_model = "gpt-3.5-turbo"
interpreter_model = "gpt-4"
num_words = 120
category = "music"
num_axes_to_sample = 10
num_interpretations_per_axis = 3
bucket_size = 20
num_synth_embeddings = 2

In [19]:
# generate data
response = client.chat.completions.create(
      model=data_gen_model,
      messages=[
        # {"role": "system", "content": "You are an expert transformer embeddings labeller."},
        {"role": "user", "content": f"Please list {num_words} words related to {category}. Please format your response as a python list. Do not assign the list to a variable name."}
      ]
    )

# print(response)

In [38]:
words_str = response.choices[0].message.content
# print(words_str)

# convert string into list
cleaned_string = re.sub(r'([\n\'])', '', words_str)
words = cleaned_string.strip("][").split(', ')
print(words)
print(len(words))

['acoustic', 'album', 'artist', 'bass', 'beat', 'billboard', 'bpm', 'chart', 'chord', 'classical', 'composer', 'concert', 'country', 'dance', 'dj', 'drum', 'electric', 'genre', 'guitar', 'harmony', 'hip-hop', 'instrument', 'jazz', 'lyrics', 'melody', 'metal', 'musician', 'notes', 'opera', 'orchestra', 'piano', 'pop', 'rap', 'record', 'reggae', 'rhythm', 'rock', 'singer', 'song', 'sound', 'songwriter', 'studio', 'symphony', 'tempo', 'verse', 'vocal', 'vocals', 'acapella', 'arrangement', 'audience', 'ballad,band', 'baritone', 'bassoon', 'beatbox', 'brass', 'cadence', 'cajun', 'cello', 'chorus', 'clarinet', 'composer', 'concert', 'conductor', 'crescendo', 'decrescendo', 'disco', 'duet', 'dynamics', 'ensemble', 'falsetto', 'flute', 'folk', 'funk', 'gig', 'glissando', 'groove', 'harmonica', 'headphones', 'horn', 'improvisation', 'interval', 'jingle', 'live', 'lyrics', 'mandolin', 'marimba', 'metronome', 'microphone', 'mixer', 'modulation', 'movement', 'mp3', 'music', 'musical', 'note', 'oct

In [42]:
# create train and test sets by randomly sampling 80-20 split

split_point = round(0.8 * len(words))
# print(split_point)

train_words = words[:split_point]
test_words = words[split_point:]

print(len(train_words))
print(len(test_words))

111
28


## get embeddings

In [43]:
from transformers import AutoTokenizer, DistilBertModel
import torch

# https://huggingface.co/distilbert-base-uncased
# https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/distilbert
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [47]:
# create embeddings

def get_embeddings(tokenizer, model, words):
    # get embedding for each class
    # ❗️ note: I am averaging the embeddings for each word in the class
    # ❓ question: are we interested in the final contextual embedding for each class? currently, we're looking at the final hidden state.
    embeddings = []
    for i in range(len(words)):
        input_ids = torch.tensor(tokenizer.encode(words[i])).unsqueeze(0)
        outputs = model(input_ids)
        last_hidden_states = outputs[0]
        # skip the first token, which is the [CLS] token, and skip the last token, which is the [SEP] token
        # average the rest of the tokens
        embeddings.append(last_hidden_states[0][1:-1].mean(dim=0).tolist())
    return embeddings

In [48]:
# get embeddings for train and test set

train_embeddings = get_embeddings(tokenizer, model, train_words)
test_embeddings = get_embeddings(tokenizer, model, test_words)

In [49]:
# sanity check
print(len(train_embeddings))
print(len(train_embeddings[0]))

print(len(test_embeddings))
print(len(test_embeddings[0]))

111
768
28
768


## dataframe

In [53]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1,1))

In [56]:
# convert embeddings to df and save as csv
def process_embeddings(scaler, words, embeddings, prefix):
    # round to 3 decimal places
    embeddings = [list(np.around(np.array(e),3)) for e in embeddings]

    # convert embeddings to pandas dataframe
    df = pd.DataFrame(embeddings)
    df.insert(0, 'word', words)

    # normalize each column to be between -1 and 1
    df.iloc[:,1:] = scaler.fit_transform(df.iloc[:,1:])

    # save to csv
    df.to_csv(f"{prefix}_output.csv", index=False)

    return embeddings, df


In [58]:
# process train/test embeddings
train_embeddings, train_df = process_embeddings(scaler, train_words, train_embeddings, "train")
test_embeddings, test_df = process_embeddings(scaler, test_words, test_embeddings, "test")

In [59]:
# sanity check
train_df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,acoustic,-0.144756,-0.170021,0.2583,0.433566,-0.134048,0.152404,-0.079646,-0.294946,0.183733,...,0.706708,-0.372419,0.505285,-0.190319,-0.043559,-0.59772,-0.163916,-0.161987,-0.494118,-0.236585
1,album,0.024205,-0.278279,-0.221053,-0.331002,0.226542,-0.240424,0.122124,-0.667306,0.212781,...,0.843994,-0.679604,0.473573,-0.084708,-0.367933,-0.167752,0.247525,0.241901,0.762745,-0.403659
2,artist,-0.111533,0.548924,-0.392713,-0.426573,0.048257,0.075795,-0.2,-0.669866,0.514887,...,0.74415,-0.853014,0.198732,0.229923,-0.149212,-0.026059,-0.381738,0.051836,0.37451,-0.307317
3,bass,0.353583,-0.374046,-0.308502,-0.198135,-0.617962,-0.075795,-0.212389,-0.15675,-0.435004,...,0.932917,-0.060281,-0.061311,-0.282728,0.208526,-0.114007,-0.526953,0.090713,0.364706,-0.565854
4,beat,0.161841,-0.283831,0.331174,-0.310023,0.328418,0.126324,0.576991,-0.43698,0.23602,...,0.25897,0.298101,-0.196617,0.544554,0.805375,0.495114,-0.680968,0.049676,-0.498039,-0.512195


In [60]:
# sanity check
test_df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,score,0.225852,-0.34293,0.02583,0.074561,0.495812,-0.412883,0.356256,-0.604858,0.674419,...,0.605825,-0.512322,0.788372,0.648725,0.479821,0.03537,0.401947,-0.478705,0.727156,0.72
1,sheet,0.862216,-0.251482,-0.060886,0.412281,-0.090452,-0.273495,-0.226361,-0.125506,0.096601,...,0.596117,0.258106,-0.465116,0.05949,0.374439,0.016077,-0.883171,-0.059625,-0.222651,0.587429
2,sitar,0.568182,-1.0,-0.856089,-0.901316,0.633166,-0.831045,0.352436,-0.183806,-0.232558,...,-1.0,0.509728,0.067442,0.382436,-0.067265,0.262594,-0.952712,0.223169,-0.214929,-0.714286
3,solo,0.106534,-0.055038,-0.252768,0.048246,0.085427,0.066526,-0.554919,0.05749,-0.348837,...,1.0,-0.045396,0.672093,-0.331445,0.284753,0.095391,-0.426982,0.083475,-0.619048,-0.337143
4,soprano,-0.215909,0.126164,-0.182657,-1.0,-0.78057,0.296727,0.232092,-0.196761,1.0,...,0.475728,-0.255512,-0.151163,-0.436261,-0.55157,-1.0,-0.452017,-0.226576,0.503218,-1.0


## interpreting axes with LLM

In [103]:
# prompt helpers
system_instructions = """
You are an expert word embedding axis interpreter who only outputs results in the form: {<interpretation>:<interpretation confidence score>}. 

You will be given a dataframe as input, where the first column is a list of words, 
and the second column is a list of numbers between -1 and 1. The numbers represent the embedding value of the word along a particular axis.

By carefully comparing and considering the embedding values for each word, please interpret the likely linguistic feature that this 
embedding axis encodes. This interpretation must be consistent across all the words and correspond to their respective positive, zero, 
or negative embedding values.  You might consider analyzing the top 10 words with values close to -1, the top 10 words with values 
close to 0 (median), and the top 10 words with values close to 1 to generate your interpretation. 

Please phrase your interpretation like: "negative sentiment vs positive sentiment", "small, blue objects vs large, red objects", 
etc (some contrast with "vs" should be present). Only include one interpretation per axis. Please use descriptive, 
contrastive phrases in your interpretations.

For each axis, also include a confidence score of how confident you are in your interpretation of that axis. 

For each axis, the output should look like this exactly: {<interpretation>:<interpretation confidence score>} (e.g., 
{"positive sentiment vs. negative sentiment": 0.6}) Remember each <interpretation> should include 1 instance of "vs".

Remember to format your output for each axis as requested above as a python dictionary. DO NOT EXPLAIN YOUR ANSWER OR OUTPUT ANYTHING 
EXCEPT THE FINAL DICTIONARY. 
"""

helper_function = """
Use the code below to help with your interpretations.

# Function to analyze an axis and derive interpretation
def analyze_axis(axis_index):
    axis_name = df.columns[axis_index]

    # Find top 10 words close to -1, 0, and 1 for this axis
    top_neg = df.nsmallest(10, axis_name)[['word', axis_name]]
    top_zero = df.iloc[(df[axis_name]-0).abs().argsort()[:10]][['word', axis_name]]
    top_pos = df.nlargest(10, axis_name)[['word', axis_name]]

    return top_neg, top_zero, top_pos
"""

system_prompt = system_instructions + helper_function

In [106]:
# create array of dicts to store interpretations of each axis
all_axes = []

# initialize empty dict for each axis
for i in range(num_axes_to_sample):
    all_axes.append({})
    
# ask model to analyze specified number of axes 
# repeat to generate multiple interpretations for each axis
for n in range(num_interpretations_per_axis):
  print(f"Round {n+1}:")
  for i in range(num_axes_to_sample):
      # get corresponding columns from dataframe
      df_subset = train_df[["word", i]]
      
      response = client.chat.completions.create(
            model=interpreter_model,
            messages=[
              {"role": "system", "content": system_prompt},
              {"role": "user", "content": f"Here is the dataframe for axis {i}:\n{df_subset.to_string(index=False)}\n\n"},
            ]
          )

      interpretation = response.choices[0].message.content
      cleaned_string = re.sub(r'([\"])', '', interpretation)
      res = cleaned_string.strip("{}").split(': ')

      # add interpretation + confidence score to axis dict
      all_axes[i][res[0]] = float(res[1])
      print("Axis", i, "done.")
      # break
  print()
  # break

Round 1:
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.

Round 2:
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.

Round 3:
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.



In [109]:
with open("interpretations.txt", "w") as f:
    for i in range(len(all_axes)):
        output_str = f"Axis {i}: {all_axes[i]}"
        print(output_str)
        # also save to file
        f.write(output_str + "\n")

Axis 0: {'Traditional, acoustic music vs. Modern, electronic music': 0.8, 'complex, classical musical terms vs simple, modern music terms': 0.8, 'Traditional or acoustic music vs. modern or electronic music': 0.8}
Axis 1: {'professional classical musicians vs. popular music performers': 0.8, 'individual performers vs ensemble performance': 0.8, "performer's perspective vs audience's perspective": 0.85}
Axis 2: {'electronic music production vs. traditional music performance': 0.75, 'modern, electronic music vs traditional, classical music': 0.85, 'music production/technical terms vs. music genres and roles': 0.8}
Axis 3: {'traditional musical elements vs. modern musical elements': 0.85, 'traditional vs. modern music elements': 0.8, 'non-electronic/digital music elements vs. electronic/digital music elements': 0.75}
Axis 4: {'instrumental music vs. vocal music': 0.7, 'physical aspects of sound production vs performative and popular music aspects': 0.85, 'live performance vs. studio recor

In [110]:
# compute descriptive stats for each axis' confidence scores
# store in dict where key is axis number and value is dict of descriptive stats
import statistics

axes_stats = {}
for i in range(len(all_axes)):
    axis = all_axes[i]
    mean = statistics.mean(axis.values())
    median = statistics.median(axis.values())
    stdev = statistics.stdev(axis.values())

    # round each val to 3 decimal places
    mean = round(mean, 3)
    median = round(median, 3)
    stdev = round(stdev, 3)

    # store in dict
    axes_stats[i] = {"mean": mean, "median": median, "stdev": stdev}

print(axes_stats)

{0: {'mean': 0.8, 'median': 0.8, 'stdev': 0.0}, 1: {'mean': 0.817, 'median': 0.8, 'stdev': 0.029}, 2: {'mean': 0.8, 'median': 0.8, 'stdev': 0.05}, 3: {'mean': 0.8, 'median': 0.8, 'stdev': 0.05}, 4: {'mean': 0.8, 'median': 0.85, 'stdev': 0.087}, 5: {'mean': 0.817, 'median': 0.8, 'stdev': 0.029}, 6: {'mean': 0.833, 'median': 0.85, 'stdev': 0.029}, 7: {'mean': 0.85, 'median': 0.85, 'stdev': 0.0}, 8: {'mean': 0.833, 'median': 0.85, 'stdev': 0.029}, 9: {'mean': 0.783, 'median': 0.8, 'stdev': 0.029}}


In [111]:
# overall mean, median, and stdev of all axes' confidence scores
mean = statistics.mean([statistics.mean(axis.values()) for axis in all_axes])
median = statistics.median([statistics.median(axis.values()) for axis in all_axes])
stdev = statistics.stdev([statistics.stdev(axis.values()) for axis in all_axes])

# print overall mean, median, and stdev
print(f"mean: {mean}")
print(f"median: {median}")
print(f"stdev: {stdev}")

mean: 0.8133333333333334
median: 0.8
stdev: 0.025198172728761126


Notes:
- mean confidence increased!!
- but slightly higher stdev (old version: 0.017), still pretty good though

## analyzing interpretations

In [117]:
# system prompt helpers

system_prompt = """
You are an expert word embedding axis interpreter.

Below I will provide interpretations and confidence scores for one axis in high dimensional word embeddings. 
Each axis has 3 potential interpretations + corresponding confidence scores. 
For each axis, summarize the three interpretations into a single interpretation per axis 
by considering the confidence scores, similarities between the interpretations, the most common interpretations, etc. 
This summary interpretation does not have to be one of the original three interpretations word for word, but it can be. 
Keep the contrasting phrases in the same relative order, as the first phrase in the interpretation represents what 
negative embedding values stand for in the axis, while the second  phrase after vs. represents what positive embedding values stand for.

As before, each interpretation should consist of two descriptive, contrastive phrases separated by "vs". 
DO NOT EXPLAIN YOUR ANSWER OR OUTPUT ANYTHING EXCEPT THE FINAL SUMMARY INTERPRETATION.
"""

system_prompt_2 = """
You are an expert word embedding axis interpreter who only answers all prompts with a single number between 1-10.

Below I will provide interpretations and confidence scores for one axis in high dimensional word embeddings. 
Each axis has 3 potential interpretations + corresponding confidence scores. 
For each axis, please assign a qualitative similarity rating from 1-10 to how similar the interpretations are 
(1: not at all similar, 10: identical).

DO NOT EXPLAIN YOUR ANSWER OR OUTPUT ANYTHING EXCEPT THE FINAL SIMILARITY RATING.
"""

In [119]:
# store final interpretations in list
final_axes = []

# initialize empty dict for each axis
for i in range(num_axes_to_sample):
    final_axes.append({})

# summarize interpretations into one per axis
for i in range(num_axes_to_sample):
      # get summary interpretation
      
      response = client.chat.completions.create(
            model=interpreter_model,
            messages=[
              {"role": "system", "content": system_prompt},
              {"role": "user", "content": f"Here are the three interpretations for axis {i}:\n{all_axes[i]}\n\n"},
            ]
          )

      interpretation = response.choices[0].message.content
      # print(interpretation)

      rating_list = []
      for t in range(num_interpretations_per_axis):
        response = client.chat.completions.create(
              model=data_gen_model,
              messages=[
                {"role": "system", "content": system_prompt_2},
                {"role": "user", "content": f"Here are the three interpretations for axis {i}:\n{all_axes[i]}\n\n"},
              ]
            )

        rating = float(response.choices[0].message.content)
        rating_list.append(rating)

      # add summary interpretation + ratings to final axes list
      final_axes[i][interpretation] = rating_list
      print("Axis", i, "done.")

Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.


In [125]:
# clean keys in final_axes
for dict in final_axes:
    key = list(dict.keys())[0]
    cleaned_string = re.sub(r'([\n\'])', '', key)
    dict[cleaned_string] = dict.pop(key)

print(final_axes)

[{'Traditional, acoustic music vs. Modern, electronic music': [8.0, 7.0, 10.0]}, {'professional classical musicians vs. popular ensemble music performance from an audiences perspective': [9.5, 10.0, 8.2]}, {'electronic music production vs. traditional music performance': [6.5, 7.7, 9.5]}, {'traditional/non-electronic music elements vs. modern/electronic music elements': [9.5, 10.0, 10.0]}, {'instrumental music vs. vocal and popular performance aspects': [7.8, 8.0, 8.5]}, {'Vocal and harmonious terms vs. genre, recording, and instrument-related terms': [8.3, 8.3, 10.0]}, {'slow, calm, non-rhythmic, traditional/instrumental music features vs. fast, energetic, rhythmic, modern/vocal music features': [8.5, 7.6, 10.0]}, {'traditional, acoustic music vs electronic, modern music': [10.0, 9.0, 7.0]}, {'classical, non-electronic aspects of music vs. modern, electronic aspects of music': [10.0, 10.0, 7.5]}, {'non-instrumental and acoustic aspects of music vs instrumental, rhythmic and electronic

In [126]:
# compute descriptive stats for each axis' confidence scores
# store in dict where key is axis number and value is dict of descriptive stats
axes_stats = {}
for i in range(len(final_axes)):
    axis = final_axes[i]
    # value is a list of confidence scores
    mean = statistics.mean(list(axis.values())[0])
    median = statistics.median(list(axis.values())[0])
    stdev = statistics.stdev(list(axis.values())[0])

    # round each val to 3 decimal places
    mean = round(mean, 3)
    median = round(median, 3)
    stdev = round(stdev, 3)

    # store in dict
    axes_stats[i] = {"mean": mean, "median": median, "stdev": stdev}

print(axes_stats)

# overall mean, median, and stdev of all axes' confidence scores
mean = statistics.mean([statistics.mean(list(axis.values())[0]) for axis in final_axes])
median = statistics.median([statistics.median(list(axis.values())[0]) for axis in final_axes])
stdev = statistics.stdev([statistics.stdev(list(axis.values())[0]) for axis in final_axes])

# print overall mean, median, and stdev
print(f"mean: {mean}")
print(f"median: {median}")
print(f"stdev: {stdev}")

{0: {'mean': 8.333, 'median': 8.0, 'stdev': 1.528}, 1: {'mean': 9.233, 'median': 9.5, 'stdev': 0.929}, 2: {'mean': 7.9, 'median': 7.7, 'stdev': 1.51}, 3: {'mean': 9.833, 'median': 10.0, 'stdev': 0.289}, 4: {'mean': 8.1, 'median': 8.0, 'stdev': 0.361}, 5: {'mean': 8.867, 'median': 8.3, 'stdev': 0.981}, 6: {'mean': 8.7, 'median': 8.5, 'stdev': 1.212}, 7: {'mean': 8.667, 'median': 9.0, 'stdev': 1.528}, 8: {'mean': 9.167, 'median': 10.0, 'stdev': 1.443}, 9: {'mean': 7.65, 'median': 7.75, 'stdev': 0.904}}
mean: 8.645
median: 8.4
stdev: 0.46438227656945974


Notes:
- slightly higher mean similarity rating (8.65 vs. 7.6 from before), seems pretty good
- stdev is a bit higher than before, but not too bad (0.46 vs 0.3)

## evaluation

In [127]:
# extract interpretations from axes
interpretations = []
for axis in final_axes:
    interpretations.append(list(axis.keys())[0])

# sanity check
print(interpretations)

['Traditional, acoustic music vs. Modern, electronic music', 'professional classical musicians vs. popular ensemble music performance from an audiences perspective', 'electronic music production vs. traditional music performance', 'traditional/non-electronic music elements vs. modern/electronic music elements', 'instrumental music vs. vocal and popular performance aspects', 'Vocal and harmonious terms vs. genre, recording, and instrument-related terms', 'slow, calm, non-rhythmic, traditional/instrumental music features vs. fast, energetic, rhythmic, modern/vocal music features', 'traditional, acoustic music vs electronic, modern music', 'classical, non-electronic aspects of music vs. modern, electronic aspects of music', 'non-instrumental and acoustic aspects of music vs instrumental, rhythmic and electronic aspects of music']


Then I asked chatgpt to assign scores for each axis, and repeated this 3 times.

```
For each criteria in this list: {list}

assign a score to each word in this list:
# insert here

(should be a float) between -1 and 1 based on the current criteria. Note that each criteria consists of two contrastive phrases, x and y, formatted like: {x vs. y}. Scores closer to -1 indicate the word is more correlated to {x}, while scores closer to 1 indicate the word is more correlated to {y}. Scores closer to 0 indicate that the word is more neutral with respect to the criteria, falls in between the extremes, or isn't really related to either {x} or {y}. You don't need to justify your scores, just provide the numbers.

For each criteria, your output should be a python list of scores (length = 100, because there are 100 words). Print each criteria on a line followed by its corresponding list like this on another line: 

{criteria}:
[score 1, score 2, ...]. 

You should not need code to perform this task, just assign scores qualitatively. Don't print anything else except for the list of scores, and don't format anything as code.

Please start with the first criteria: {criteria}. Only do one criteria at a time.
```

In [189]:
# system prompt helpers

system_prompt = f"""
You are an expert word sense scorer who answers all prompts by outputting a single python list of {len(train_words)} scores that looks
exactly like this: [score 1, score 2, ...].

For the given criteria and list, please assign a score to each word in the list. Each score should be a float between -1 and 1 
based on the current criteria. Note that each criteria consists of two contrastive phrases, x and y, 
formatted like: x vs. y. Scores closer to -1 indicate the word is more correlated to x, while scores closer to 1 indicate the word
is more correlated to y. Scores closer to 0 indicate that the word is more neutral with respect to the criteria, 
falls in between the extremes, or isn't really related to either x or y. You don't need to justify your scores, just provide the numbers.

Your output should be a python list of scores (length = {len(train_words)}, because there are {len(train_words)} words). 
It should look like this: [score 1, score 2, ...].

You should not need code to perform this task, just assign scores qualitatively. 
DO NOT EXPLAIN YOUR ANSWER OR OUTPUT ANYTHING EXCEPT THE FINAL LIST OF SCORES.
"""

In [191]:
# get synethic embeddings
def assign_values(words, interpretations):
    # store final interpretations in list
  value_list = []

  # initialize empty list for each axis
  for i in range(num_axes_to_sample):
      # get list for this axis

      # store results in list
      axis_list = []

      # iterate through words by bucket size 
      for j in range(0, len(words), bucket_size):
        # get bucket of words
        bucket = words[j:j+bucket_size]

        response = client.chat.completions.create(
              model=data_gen_model,
              messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Here is the criteria for axis {i}:\n{interpretations[i]}\n\n and here is the list of words:\n{bucket}\n\n"},
              ]
            )

        values = response.choices[0].message.content

        # convert string list representation to list
        cleaned_string = re.sub(r'([\[\]\n])', '', values)
        new_values = cleaned_string.split(", ")

        # convert each value to float
        final_values = [float(val) for val in new_values]

        # add to axis list
        axis_list.extend(final_values)

      # crop to length of train_words
      axis_list = axis_list[:len(train_words)]
      # print(axis_list)
      # print(len(axis_list))

      # append to larger list
      value_list.append(axis_list)
      print("Axis", i, "done.")
      # break
  return value_list

In [143]:
# get specified number of synthetic embeddings
all_synth_embeddings = []

for i in range(num_synth_embeddings):
    print(f"running round {i+1}")
    val_list = assign_values(train_words, interpretations)
    all_synth_embeddings.append(val_list)
    print()


running round 1
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.

running round 2
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.



In [196]:
def get_syn_stats(embeddings, words):
    # for every group of k axes, calculate the mean and standard deviation for each value in the list
    # store in dict
    axis_dict = {}

    # initialize the dict for each axis 
    for i in range(num_axes_to_sample):
        axis_dict[f"axis{i}"] = {"mean": [], "std": []}

        # each group of k axes represents the k interpretations of one axis

        # group interpretations by word
        list_of_vals = []
        for w in range(len(words)):
            # get all interpretations for one word
            word_interps = []
            for j in range(num_synth_embeddings):
                word_interps.append(embeddings[j][i][w])

            # add to list of vals
            list_of_vals.append(word_interps)

        # get k interpretations of one axis
        axis_dict[f"axis{i}"]["mean"] = [statistics.mean(list_of_vals[j]) for j in range(len(words))]
        axis_dict[f"axis{i}"]["std"] = [statistics.stdev(list_of_vals[j]) for j in range(len(words))]
        
    return axis_dict

In [171]:
axis_dict = get_syn_stats(all_synth_embeddings, train_words)

print(len(axis_dict["axis0"]["std"]))

111


In [173]:
# round each value in mean lists to 3 decimal places
# and only save mean std values
for axis in axis_dict:
    axis_dict[axis]["mean"] = [round(val, 3) for val in axis_dict[axis]["mean"]]
    axis_dict[axis]["std"] = statistics.mean(axis_dict[axis]["std"])

In [174]:
# print overall mean std
print(statistics.mean([axis_dict[axis]["std"] for axis in axis_dict]))

0.3161276938412831


Note: still some variability (all values fall between -1 and 1)
- mean sd did decrease a bit (before: 0.41)

## comparing old and generated embeddings for first 10 axes

In [176]:
# create dataframe with columns 0-9
df_llm = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# use mean values to populate dataframe; column numbers are the axis numbers
for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    df_llm[axis] = axis_dict[axis_title]["mean"]

# add words as first column
df_llm.insert(0, "word", train_words)

df_llm.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,9
0,acoustic,-0.1,0.3,0.35,0.0,-0.05,0.25,-0.7,0.45,-0.25,0.05
1,album,-0.05,-0.1,0.0,0.0,0.0,-0.25,-0.1,0.05,0.0,-0.2
2,artist,-0.2,0.05,0.15,-0.05,0.15,0.0,0.0,0.25,-0.1,0.1
3,bass,0.05,-0.15,-0.15,-0.1,-0.2,-0.5,-0.3,-0.35,0.0,-0.55
4,beat,-0.1,-0.15,-0.25,-0.55,-0.15,-0.5,-0.15,-0.45,0.0,-0.6


In [177]:
# compare values in each column (axis) of df_llm to original dataframe df
# store mean difference and std for each axis

# initialize dict to store mean difference and std for each axis
axis_diff_dict = {}

# initialize dict for each axis
for i in range(10):
    axis_diff_dict[f"axis{i}"] = {"mean_diff": [], "std_diff": []}

# for each axis, calculate mean difference and std
for axis in range(len(axis_diff_dict)):
    diffs = [abs(df_llm[axis][i] - train_df[axis][i]) for i in range(len(df_llm))]
    mean_diff = statistics.mean(diffs)
    std_diff = statistics.stdev(diffs)

    axis_diff_dict[f"axis{axis}"]["mean_diff"] = mean_diff
    axis_diff_dict[f"axis{axis}"]["std_diff"] = std_diff

# print results
print(axis_diff_dict)

{'axis0': {'mean_diff': 0.3834353100133831, 'std_diff': 0.3068747087577499}, 'axis1': {'mean_diff': 0.3784155772705391, 'std_diff': 0.29937707658114926}, 'axis2': {'mean_diff': 0.36003027318816794, 'std_diff': 0.2982734560627241}, 'axis3': {'mean_diff': 0.48865473865473863, 'std_diff': 0.36044274952704647}, 'axis4': {'mean_diff': 0.3307272419872956, 'std_diff': 0.2567127672336318}, 'axis5': {'mean_diff': 0.37780531142389334, 'std_diff': 0.2883311285010431}, 'axis6': {'mean_diff': 0.4754444710196923, 'std_diff': 0.3459609463588012}, 'axis7': {'mean_diff': 0.4103156899702005, 'std_diff': 0.33156065647709504}, 'axis8': {'mean_diff': 0.48841259560213807, 'std_diff': 0.3706576337002312}, 'axis9': {'mean_diff': 0.4594126450509429, 'std_diff': 0.31977482812308317}}


In [178]:
# print overall mean difference
overall_mean = statistics.mean([axis_diff_dict[axis]["mean_diff"] for axis in axis_diff_dict])
mean_uncertainty = overall_mean / 2
print(f"mean diff: {overall_mean} (uncertainty: {mean_uncertainty:.2f})")

# print overall mean std
overall_std = statistics.mean([axis_diff_dict[axis]["std_diff"] for axis in axis_diff_dict])
std_uncertainty = overall_std / 2
print(f"std of diffs: {overall_std} (uncertainty: {std_uncertainty:.2f})")

mean diff: 0.41526538541809915 (uncertainty: 0.21)
std of diffs: 0.3177965951322555 (uncertainty: 0.16)


Here mean + std of diffs decreased a bit (before: mean = 0.51, std = 0.36)

## cosine similarity

In [179]:
# import cosine similarity function
from sklearn.metrics.pairwise import cosine_similarity

In [180]:
# compare cosine similarity of original and synthesized embeddings
# ❓ question: should we be comparing by axis or word?

# try columns first (by axis)
# compute cosine similarity for each axis in df_llm and df
# store in list
axis_cos_sim_list = []

# also try by word (row)
# compute cosine similarity for each word in df_llm and df
# store in list 
word_cos_sim_list = []

for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    axis_cos_sim = cosine_similarity([df_llm[axis]], [train_df[axis]])
    axis_cos_sim = axis_cos_sim[0][0]
    axis_cos_sim_list.append(axis_cos_sim)

for word in range(len(df_llm)):
    # crop rows in df to 10 dimensions
    word_cos_sim = cosine_similarity([df_llm.iloc[word][1:]], [train_df.iloc[word][1:11]])
    word_cos_sim = word_cos_sim[0][0]
    word_cos_sim_list.append(word_cos_sim)

In [181]:
# see results
print(axis_cos_sim_list)
print(word_cos_sim_list)

[-0.02975583067635356, 0.08594999254554996, 0.13766498382046113, -0.15096478516055253, 0.31379792499593867, 0.4543792572602663, 0.09704635401583447, 0.17915383212662028, -0.2022047594947824, -0.11502251659196361]
[-0.004400368579769479, 0.2501986181068107, -0.4664805658857885, -0.04312731849018198, 0.26724210926520536, 0.29536922277703126, 0.3839853418020965, -0.2719131214541242, 0.21490925314836437, -0.27541500256841384, -0.09636583294926382, 0.4626836007276771, 0.23860263429396172, 0.15535655672268664, -0.07074930393380285, 0.352182558207205, -0.3948802911317597, 0.1511842843732408, -0.15568275912390778, 0.7326600986249735, 0.10242485630918342, 0.39877759227068343, 0.08484800421425578, -0.01829869863838149, -0.5143439173357303, 0.3524210759294392, 0.5991440102648053, -0.12643413279692509, 0.5522818662839164, 0.1204758239680385, 0.0594343015946515, -0.5838579953012957, 0.10465224254190522, 0.5746353935384705, -0.15656761472906325, 0.04064768465046662, 0.10593704348457518, -0.151933240

In [182]:
# take average of each list
axis_cos_sim_avg = statistics.mean(axis_cos_sim_list)
word_cos_sim_avg = statistics.mean(word_cos_sim_list)

# print results
print(f"axis cos sim avg: {axis_cos_sim_avg}")
print(f"word cos sim avg: {word_cos_sim_avg}")

axis cos sim avg: 0.07700444528410187
word cos sim avg: 0.09488401609987736


In [183]:
# also print max, min, and std
print(f"axis cos sim max: {max(axis_cos_sim_list)}")
print(f"axis cos sim min: {min(axis_cos_sim_list)}")
print(f"axis cos sim std: {statistics.stdev(axis_cos_sim_list)}")

print(f"word cos sim max: {max(word_cos_sim_list)}")
print(f"word cos sim min: {min(word_cos_sim_list)}")
print(f"word cos sim std: {statistics.stdev(word_cos_sim_list)}")

axis cos sim max: 0.4543792572602663
axis cos sim min: -0.2022047594947824
axis cos sim std: 0.20850756994268055
word cos sim max: 0.8045420272892483
word cos sim min: -0.6720800080421537
word cos sim std: 0.304059082329833


Notes: oof, still not very good 😅 

## test accuracy

In [192]:
# values assigned by chatgpt
# generated vals for each axis

# get specified number of synthetic embeddings
test_synth_embeddings = []

for i in range(num_synth_embeddings):
    print(f"running round {i+1}")
    val_list = assign_values(test_words, interpretations)
    test_synth_embeddings.append(val_list)
    print()


running round 1
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.

running round 2
Axis 0 done.
Axis 1 done.
Axis 2 done.
Axis 3 done.
Axis 4 done.
Axis 5 done.
Axis 6 done.
Axis 7 done.
Axis 8 done.
Axis 9 done.



In [197]:
axis_dict = get_syn_stats(test_synth_embeddings, test_words)

print(len(axis_dict["axis0"]["std"]))

28


In [198]:
# round each value in mean lists to 3 decimal places
# and only save mean std values
for axis in axis_dict:
    axis_dict[axis]["mean"] = [round(val, 3) for val in axis_dict[axis]["mean"]]
    axis_dict[axis]["std"] = statistics.mean(axis_dict[axis]["std"])

# print overall mean std
print(statistics.mean([axis_dict[axis]["std"] for axis in axis_dict]))

0.3868884245634967


slightly larger std than training data (0.32)

In [199]:
# compare original test embeddings to the generated embeddings
# create dataframe with columns 0-9
df_test_llm = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# use mean values to populate dataframe; column numbers are the axis numbers
for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    df_test_llm[axis] = axis_dict[axis_title]["mean"]

# add words as first column
df_test_llm.insert(0, "word", test_words)

df_test_llm.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,9
0,score,0.1,-0.75,0.1,0.15,-0.25,-0.5,-0.4,-0.65,-0.45,0.75
1,sheet,0.15,-0.7,0.15,0.2,-0.4,-0.35,-0.35,-0.55,-0.4,0.05
2,sitar,-0.85,-0.65,-0.45,-0.7,0.7,-0.65,-0.4,-0.45,-0.45,0.2
3,solo,-0.25,-0.6,0.25,0.1,0.1,-0.4,-0.55,-0.45,-0.2,0.6
4,soprano,-0.3,-0.55,0.3,0.05,0.1,0.35,-0.75,-0.5,-0.3,-0.3


In [200]:
# compare values in each column (axis) of df_llm to original dataframe df
# store mean difference and std for each axis

# initialize dict to store mean difference and std for each axis
axis_diff_dict = {}

# initialize dict for each axis
for i in range(10):
    axis_diff_dict[f"axis{i}"] = {"mean_diff": [], "std_diff": []}

# for each axis, calculate mean difference and std
for axis in range(len(axis_diff_dict)):
    diffs = [abs(df_test_llm[axis][i] - test_df[axis][i]) for i in range(len(df_test_llm))]
    mean_diff = statistics.mean(diffs)
    std_diff = statistics.stdev(diffs)

    axis_diff_dict[f"axis{axis}"]["mean_diff"] = mean_diff
    axis_diff_dict[f"axis{axis}"]["std_diff"] = std_diff

# print results
print(axis_diff_dict)

{'axis0': {'mean_diff': 0.44193384740259745, 'std_diff': 0.3327981109254907}, 'axis1': {'mean_diff': 0.5870630216523527, 'std_diff': 0.4526205713541023}, 'axis2': {'mean_diff': 0.3681931997891407, 'std_diff': 0.30379226058366227}, 'axis3': {'mean_diff': 0.3804354636591479, 'std_diff': 0.2646289116551178}, 'axis4': {'mean_diff': 0.5968951902368987, 'std_diff': 0.46006520127885964}, 'axis5': {'mean_diff': 0.4280189319656057, 'std_diff': 0.36696967321111135}, 'axis6': {'mean_diff': 0.47674307545367717, 'std_diff': 0.3401465996655138}, 'axis7': {'mean_diff': 0.44510555234239446, 'std_diff': 0.36893521591314427}, 'axis8': {'mean_diff': 0.5182820086889854, 'std_diff': 0.3574508303125172}, 'axis9': {'mean_diff': 0.45993772105182223, 'std_diff': 0.3477532782865477}}


In [201]:
# print overall mean difference
overall_mean = statistics.mean([axis_diff_dict[axis]["mean_diff"] for axis in axis_diff_dict])
mean_uncertainty = overall_mean / 2
print(f"mean diff: {overall_mean} (uncertainty: {mean_uncertainty:.2f})")

# print overall mean std
overall_std = statistics.mean([axis_diff_dict[axis]["std_diff"] for axis in axis_diff_dict])
std_uncertainty = overall_std / 2
print(f"std of diffs: {overall_std} (uncertainty: {std_uncertainty:.2f})")

mean diff: 0.4702608012242622 (uncertainty: 0.24)
std of diffs: 0.3595160653186067 (uncertainty: 0.18)


Somewhat higher mean and std oops

In [202]:
# try cosine similarity too
# compare cosine similarity of original and synthesized embeddings
# ❓ question: should we be comparing by axis or word?

# try columns first (by axis)
# compute cosine similarity for each axis in df_llm and df
# store in list
axis_cos_sim_list = []

# also try by word (row)
# compute cosine similarity for each word in df_llm and df
# store in list 
word_cos_sim_list = []

for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    axis_cos_sim = cosine_similarity([df_test_llm[axis]], [test_df[axis]])
    axis_cos_sim = axis_cos_sim[0][0]
    axis_cos_sim_list.append(axis_cos_sim)

for word in range(len(df_test_llm)):
    # crop rows in df to 10 dimensions
    word_cos_sim = cosine_similarity([df_test_llm.iloc[word][1:]], [test_df.iloc[word][1:11]])
    word_cos_sim = word_cos_sim[0][0]
    word_cos_sim_list.append(word_cos_sim)

In [203]:
# take average of each list
axis_cos_sim_avg = statistics.mean(axis_cos_sim_list)
word_cos_sim_avg = statistics.mean(word_cos_sim_list)

# print results
print(f"axis cos sim avg: {axis_cos_sim_avg}")
print(f"word cos sim avg: {word_cos_sim_avg}")

# also print max, min, and std
print(f"axis cos sim max: {max(axis_cos_sim_list)}")
print(f"axis cos sim min: {min(axis_cos_sim_list)}")
print(f"axis cos sim std: {statistics.stdev(axis_cos_sim_list)}")

print(f"word cos sim max: {max(word_cos_sim_list)}")
print(f"word cos sim min: {min(word_cos_sim_list)}")
print(f"word cos sim std: {statistics.stdev(word_cos_sim_list)}")

axis cos sim avg: 0.12703245501373045
word cos sim avg: 0.16730261575463778
axis cos sim max: 0.48311087013454246
axis cos sim min: -0.11996968397324612
axis cos sim std: 0.21334053719117219
word cos sim max: 0.7346214693984381
word cos sim min: -0.41310226979474285
word cos sim std: 0.30696717689788283


hmm... yeah the cosine similarity is not great