## data

In [20]:
# list of art related words (generated by chatgpt)
words = ['abstract',
 'aesthetic',
 'acrylic',
 'artistry',
 'animation',
 'brushwork',
 'canvas',
 'ceramics',
 'collage',
 'color',
 'composition',
 'creativity',
 'culture',
 'design',
 'drawing',
 'easel',
 'expression',
 'fresco',
 'gallery',
 'graffiti',
 'hue',
 'illustration',
 'impressionism',
 'ink',
 'installation',
 'landscape',
 'masterpiece',
 'medium',
 'mural',
 'museum',
 'oil',
 'palette',
 'pastel',
 'perspective',
 'photography',
 'pigment',
 'portrait',
 'realism',
 'sculpture',
 'sketch',
 'still life',
 'surrealism',
 'texture',
 'tone',
 'watercolor',
 'abstract expressionism',
 'art deco',
 'baroque',
 'byzantine',
 'carving',
 'chiaroscuro',
 'cubism',
 'dadaism',
 'etching',
 'expressionism',
 'fauvism',
 'genre',
 'gouache',
 'harmony',
 'impression',
 'juxtaposition',
 'kinetic',
 'line',
 'minimalism',
 'modernism',
 'neoclassicism',
 'ornament',
 'perspective',
 'pop art',
 'post-impressionism',
 'realism',
 'renaissance',
 'rococo',
 'romanticism',
 'satire',
 'shade',
 'silhouette',
 'symmetry',
 'tapestry',
 'tempera',
 "trompe l'oeil",
 'urban art',
 'vanguard',
 'veneer',
 'vignette',
 'whimsical',
 'xenography',
 'yield',
 'zenith',
 'zest',
 'fresco',
 'impasto',
 'montage',
 'opus',
 'palette knife',
 'quattrocento',
 'relief',
 'stipple',
 'underpainting',
 'varnish']

In [21]:
# sanity check
len(words)

100

## get embeddings

In [22]:
from transformers import AutoTokenizer, DistilBertModel
import torch

# https://huggingface.co/distilbert-base-uncased
# https://huggingface.co/docs/transformers/v4.35.0/en/model_doc/distilbert
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [23]:
# get embedding for each class
# ❗️ note: I am averaging the embeddings for each word in the class
# ❓ question: are we interested in the final contextual embedding for each class? currently, we're looking at the final hidden state.
embeddings = []
for i in range(len(words)):
    input_ids = torch.tensor(tokenizer.encode(words[i])).unsqueeze(0)
    outputs = model(input_ids)
    last_hidden_states = outputs[0]
    # skip the first token, which is the [CLS] token, and skip the last token, which is the [SEP] token
    # average the rest of the tokens
    embeddings.append(last_hidden_states[0][1:-1].mean(dim=0).tolist())

In [24]:
# sanity check
print(len(embeddings))
print(len(embeddings[0]))

100
768


In [25]:
import numpy as np
# round each val in embedding to 3 decimal places
embeddings = [list(np.around(np.array(e),3)) for e in embeddings]

# create string of all classes and their embeddings & save to text file
# ❗️ note: only taking first 10 axes for now due to context window length
with open("output.txt", "w") as text_file:
    for i in range(len(words)):
        class_str = f"{words[i]}: {embeddings[i][:10]}\n"
        text_file.write(class_str)

## dataframe

In [18]:
import pandas as pd

In [26]:
# convert embeddings to pandas dataframe
df = pd.DataFrame(embeddings)
df.insert(0, 'word', words)

# sanity check
df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,abstract,0.286,0.395,-0.382,-0.242,0.407,0.01,-0.199,0.06,0.203,...,0.734,-0.119,0.476,0.058,0.239,-0.068,0.101,0.026,0.26,-0.127
1,aesthetic,0.249,0.566,-0.123,-0.117,0.271,0.083,0.036,-0.069,0.083,...,0.516,0.024,0.273,-0.004,0.114,-0.206,0.08,0.043,0.187,0.201
2,acrylic,0.217,0.234,-0.019,0.087,0.777,-0.107,-0.655,0.548,-0.061,...,0.254,-0.524,0.176,0.345,0.337,-0.254,-0.499,-0.021,0.162,0.142
3,artistry,0.147,0.263,-0.044,-0.078,0.66,0.147,-0.043,-0.021,-0.12,...,0.639,-0.331,0.187,0.041,-0.143,-0.029,0.11,-0.205,0.363,-0.308
4,animation,-0.006,0.449,-0.484,0.105,0.504,0.228,0.095,0.199,-0.532,...,0.55,-0.206,0.477,0.005,0.14,-0.114,0.244,-0.032,0.513,0.498


In [27]:
# normalize each column to be between -1 and 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1,1))
df.iloc[:,1:] = scaler.fit_transform(df.iloc[:,1:])

# sanity check
df.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,abstract,0.223359,0.486683,-0.490754,-0.152763,-0.189744,0.078056,-0.045004,-0.449067,0.400238,...,0.338893,0.285846,0.473819,0.261603,0.072235,0.110902,0.214545,0.294118,0.164329,-0.366919
1,aesthetic,0.178808,0.693705,-0.122333,0.118093,-0.364103,0.185567,0.313501,-0.634146,0.25772,...,0.00834,0.550416,-0.0447,0.130802,-0.209932,-0.148496,0.176364,0.329205,0.018036,0.183879
2,acrylic,0.140277,0.291768,0.025605,0.56013,0.284615,-0.094256,-0.740656,0.251076,0.086698,...,-0.388931,-0.46346,-0.292465,0.867089,0.293454,-0.238722,-0.876364,0.19711,-0.032064,0.084803
3,artistry,0.05599,0.326877,-0.009957,0.2026,0.134615,0.279823,0.192982,-0.56528,0.016627,...,0.194845,-0.106383,-0.264368,0.225738,-0.790068,0.184211,0.230909,-0.182663,0.370741,-0.670865
4,animation,-0.128236,0.552058,-0.635846,0.599133,-0.065385,0.399116,0.403509,-0.249641,-0.472684,...,0.059894,0.124884,0.476373,0.149789,-0.151242,0.024436,0.474545,0.174407,0.671343,0.68262


In [18]:
# save to csv
df.to_csv("output.csv", index=False)

## interpreting axes with chatgpt

Goal: try more detailed interpretations this time, instead of just 3 words (x vs. y)

I uploaded the csv generated above and used the following prompt:
```
This CSV contains a list of words and their embeddings (each column after the word represents an axis in the embedding. 

By carefully comparing and considering the embedding values for each word, please interpret the likely linguistic feature that each embedding axis encodes. This interpretation must be consistent across all the words and correspond to their respective positive, zero, or negative embedding values.  You might consider analyzing the top 10 words with values close to -1, the top 10 words with values close to 0 (median), and the top 10 words with values close to 1 to generate your interpretation. 

Please phrase your interpretation like: "negative sentiment vs positive sentiment", "small, blue objects vs large, red objects", etc (some contrast with "vs" should be present). You should only have one interpretation per axis. Please use descriptive, contrastive phrases in your interpretations. Try to have unique interpretations for each axis too.

For each axis, also include a confidence score of how confident you are in your interpretation of each axis. This does not need to be the same for each axis.

For each axis, the output should look like this: {<interpretation>:<interpretation confidence score>} (e.g., {"positive sentiment vs. negative sentiment": 0.6}) Remember each <interpretation> should include "vs".

Start with the first axis, then repeat the same process for the remaining 9 axes, one at a time. Remember to format your output for each axis as requested above as a python dictionary. Each axis should yield 1 dictionary with a interpretation + confidence score, for a total of 10 dictionaries. DO NOT OUTPUT ANYTHING EXCEPT THE FINAL 10 DICTIONARIES.

Use the code below to help with your interpretations.

# Function to analyze an axis and derive interpretation
def analyze_axis(axis_index):
    axis_name = df.columns[axis_index]

    # Find top 10 words close to -1, 0, and 1 for this axis
    top_neg = df.nsmallest(10, axis_name)[['word', axis_name]]
    top_zero = df.iloc[(df[axis_name]-0).abs().argsort()[:10]][['word', axis_name]]
    top_pos = df.nlargest(10, axis_name)[['word', axis_name]]

    return top_neg, top_zero, top_pos
```

❗️ note: seems chatgpt's limit is 9 axes at a time (if using csv at least haha)


In [1]:
# interpretations of first 10 axes:  
# each dict item is formatted as {<interpretation>: <confidence score>}
axis_0 = {"traditional art mediums vs modern urban art forms": 0.7, "traditional, classical art forms vs modern, contemporary art practices": 0.6, "traditional or classical art forms and techniques vs. modern or physical art mediums and spaces": 0.8}
axis_1 = {"minimalist and abstract art forms vs detailed and realistic art forms": 0.65, "basic artistic elements, traditional styles vs specific art movements, modern mediums": 0.7, "linear, detailed, and classic artistic styles vs. bold, expressive, and modern art movements": 0.75}
axis_2 = {"avant-garde and abstract art vs classical and traditional art": 0.75, "unconventional, abstract, ancient styles vs classical, well-defined, precise art elements": 0.8, "abstract, unconventional art forms vs. classical, technique-focused art styles": 0.8}
axis_3 = {"whimsical and creative art vs structured and formal art": 0.7, "traditional, structured, historical art styles vs creative, innovative, modern art techniques": 0.7, "structured, formal artistic elements vs. imaginative, creative artistic expressions": 0.7}
axis_4 = {"romantic and expressive art vs satirical and kinetic art": 0.65, "grounded, realistic art concepts vs abstract, expressive art styles": 0.8, "realistic, serious art forms vs. vibrant, expressive art styles": 0.75}
axis_5 = {"abstract and cubist art vs detailed and tonal art": 0.7, "abstract, emotionally expressive art vs fundamental, technical aspects of art": 0.75, "abstract, modernist art movements vs. concrete, traditional art elements": 0.7}
axis_6 = {"photographic and surreal art vs traditional painting mediums": 0.65, "traditional, fundamental art tools and techniques vs modern, innovative artistic methods": 0.8, "physical art mediums and tools vs. conceptual and abstract art elements": 0.65}
axis_7 = {"classic and colorful art vs modern and monochromatic art": 0.6, "modern, innovative art forms vs classical, traditional art techniques and materials": 0.75, "modern art techniques and spaces vs. traditional artistic styles and techniques": 0.7}
axis_8 = {"expressive and vibrant art vs structured and subdued art": 0.65, "established, mainstream art concepts vs artistic creation process, individual expression": 0.7, "contemporary, urban art styles vs. classic, time-honored art forms": 0.75}
axis_9 = {"modern and institutional art vs classic and personal art": 0.6, "innovative, experimental art movements vs traditional, historical, institutional art concepts": 0.7, "modernist and abstract art movements vs. physical art spaces and classic art": 0.65}

# concatenate all axes into one list
all_axes = [axis_0, axis_1, axis_2, axis_3, axis_4, axis_5, axis_6, axis_7, axis_8, axis_9]

In [2]:
# compute descriptive stats for each axis' confidence scores
# store in dict where key is axis number and value is dict of descriptive stats
import statistics

axes_stats = {}
for i in range(len(all_axes)):
    axis = all_axes[i]
    mean = statistics.mean(axis.values())
    median = statistics.median(axis.values())
    stdev = statistics.stdev(axis.values())

    # round each val to 3 decimal places
    mean = round(mean, 3)
    median = round(median, 3)
    stdev = round(stdev, 3)

    # store in dict
    axes_stats[i] = {"mean": mean, "median": median, "stdev": stdev}

print(axes_stats)

{0: {'mean': 0.7, 'median': 0.7, 'stdev': 0.1}, 1: {'mean': 0.7, 'median': 0.7, 'stdev': 0.05}, 2: {'mean': 0.783, 'median': 0.8, 'stdev': 0.029}, 3: {'mean': 0.7, 'median': 0.7, 'stdev': 0.0}, 4: {'mean': 0.733, 'median': 0.75, 'stdev': 0.076}, 5: {'mean': 0.717, 'median': 0.7, 'stdev': 0.029}, 6: {'mean': 0.7, 'median': 0.65, 'stdev': 0.087}, 7: {'mean': 0.683, 'median': 0.7, 'stdev': 0.076}, 8: {'mean': 0.7, 'median': 0.7, 'stdev': 0.05}, 9: {'mean': 0.65, 'median': 0.65, 'stdev': 0.05}}


In [3]:
# overall mean, median, and stdev of all axes' confidence scores
mean = statistics.mean([statistics.mean(axis.values()) for axis in all_axes])
median = statistics.median([statistics.median(axis.values()) for axis in all_axes])
stdev = statistics.stdev([statistics.stdev(axis.values()) for axis in all_axes])

# print overall mean, median, and stdev
print(f"mean: {mean}")
print(f"median: {median}")
print(f"stdev: {stdev}")

mean: 0.7066666666666667
median: 0.7
stdev: 0.03055518982121737


Notes:
- mean confidence increased
- but slightly higher stdev (old version: 0.017), still pretty good though

## analyzing interpretations

I then asked chatgpt to assess how similar the 3 interpretations were for each axis and create a summary interpretation. Again, I asked it to do this 3 times to see how reliable the results were.

```
Below I will provide interpretations and confidence scores for axes in high dimensional word embeddings. Each axis has 3 potential interpretations + corresponding confidence scores. For each axis, please assign a qualitative similarity rating from 1-10 to how similar the interpretations are (1: not at all similar, 10: identical). Then summarize the three interpretations into a single interpretation per axis by considering the confidence scores, similarities between the interpretations, the most common interpretations, etc. This summary interpretation does not have to be one of the original three interpretations word for word, but it can be. Keep the contrasting phrases in the same relative order, as the first phrase in the interpretation represents what negative embedding values stand for in the axis, while the second  phrase after vs. represents what positive embedding values stand for.

Your final answer for each axis should be a python dict formatted as follows: {<interpretation>: <similarity rating>}. As before, each interpretation should consist of two descriptive, contrastive phrases separated by "vs". And the similarity rating should just be the number from 1-10.

Here are the axis interpretations:
# insert here

Remember to format your answer for each axis as a python dict as requested above. You should not need to write code do this, please just do this qualitatively. Also make sure there's no duplicate interpretations.
```

In [4]:
# final axes dict
# ❗️ note: it seems axes are not necessarily unique
axis_0 = {"traditional or classical art forms and techniques vs modern or contemporary art mediums and practices": [9,8,8]}
axis_1 = {"basic, linear, and detailed artistic elements vs bold, expressive, and modern art styles and movements": [8,7,7]}
axis_2 = {"abstract, unconventional, avant-garde art vs classical, traditional, technique-focused art styles": [9,9,8]}
axis_3 = {"structured, traditional, historical art styles vs imaginative, creative, innovative art expressions": [9,8,8]}
axis_4 = {"realistic, serious, romantic art forms vs vibrant, expressive, satirical art styles": [7,7,7]}
axis_5 = {"abstract, modernist, emotionally expressive art vs concrete, traditional, technical art elements": [8,8,7]}
axis_6 = {"traditional, physical art mediums and tools vs modern, innovative, conceptual art methods": [7,7,7]}
axis_7 = {"modern, innovative, colorful art techniques vs traditional, classic, monochromatic art styles": [8,7,7]}
axis_8 = {"contemporary, expressive, vibrant art vs structured, subdued, classic art forms": [7,7,7]}
axis_9 = {"modern, innovative, institutional art movements vs classic, personal, traditional art concepts": [7,7,7]}

# concatenate all axes into one list
all_axes = [axis_0, axis_1, axis_2, axis_3, axis_4, axis_5, axis_6, axis_7, axis_8, axis_9]

In [5]:
# compute descriptive stats for each axis' confidence scores
# store in dict where key is axis number and value is dict of descriptive stats
axes_stats = {}
for i in range(len(all_axes)):
    axis = all_axes[i]
    # value is a list of confidence scores
    mean = statistics.mean(list(axis.values())[0])
    median = statistics.median(list(axis.values())[0])
    stdev = statistics.stdev(list(axis.values())[0])

    # round each val to 3 decimal places
    mean = round(mean, 3)
    median = round(median, 3)
    stdev = round(stdev, 3)

    # store in dict
    axes_stats[i] = {"mean": mean, "median": median, "stdev": stdev}

print(axes_stats)

# overall mean, median, and stdev of all axes' confidence scores
mean = statistics.mean([statistics.mean(list(axis.values())[0]) for axis in all_axes])
median = statistics.median([statistics.median(list(axis.values())[0]) for axis in all_axes])
stdev = statistics.stdev([statistics.stdev(list(axis.values())[0]) for axis in all_axes])

# print overall mean, median, and stdev
print(f"mean: {mean}")
print(f"median: {median}")
print(f"stdev: {stdev}")

{0: {'mean': 8.333, 'median': 8, 'stdev': 0.577}, 1: {'mean': 7.333, 'median': 7, 'stdev': 0.577}, 2: {'mean': 8.667, 'median': 9, 'stdev': 0.577}, 3: {'mean': 8.333, 'median': 8, 'stdev': 0.577}, 4: {'mean': 7, 'median': 7, 'stdev': 0.0}, 5: {'mean': 7.667, 'median': 8, 'stdev': 0.577}, 6: {'mean': 7, 'median': 7, 'stdev': 0.0}, 7: {'mean': 7.333, 'median': 7, 'stdev': 0.577}, 8: {'mean': 7, 'median': 7, 'stdev': 0.0}, 9: {'mean': 7, 'median': 7, 'stdev': 0.0}}
mean: 7.566666666666666
median: 7.0
stdev: 0.2981423969999719


Notes:
- slightly higher mean similarity rating (7.6 vs. 7.0 from before), seems pretty good
- stdev is also lower than before (0.3 vs 0.58)

## evaluation

In [6]:
# extract interpretations from axes
interpretations = []
for axis in all_axes:
    interpretations.append(list(axis.keys())[0])

# sanity check
print(interpretations)

['traditional or classical art forms and techniques vs modern or contemporary art mediums and practices', 'basic, linear, and detailed artistic elements vs bold, expressive, and modern art styles and movements', 'abstract, unconventional, avant-garde art vs classical, traditional, technique-focused art styles', 'structured, traditional, historical art styles vs imaginative, creative, innovative art expressions', 'realistic, serious, romantic art forms vs vibrant, expressive, satirical art styles', 'abstract, modernist, emotionally expressive art vs concrete, traditional, technical art elements', 'traditional, physical art mediums and tools vs modern, innovative, conceptual art methods', 'modern, innovative, colorful art techniques vs traditional, classic, monochromatic art styles', 'contemporary, expressive, vibrant art vs structured, subdued, classic art forms', 'modern, innovative, institutional art movements vs classic, personal, traditional art concepts']


In [57]:
# create template csv file
# each row is a word from list 'words', each column will store the interpretation of that word for each axis

# create header row
# first column is 'word', rest of columns are interpretations; have 3 columns per interpretation
header = ['word'] + interpretations

# create dataframe
df = pd.DataFrame(columns=header)
df['word'] = words

# replace NaN with 0
df = df.fillna(0)

# sanity check
df.head()

# save dataframe to csv
df.to_csv('llm_scores.csv', index=False)


Then I asked chatgpt to assign scores for each axis, and repeated this 3 times.

```
For each criteria in this list: {list}

assign a score to each word in this list:
# insert here

(should be a float) between -1 and 1 based on the current criteria. Note that each criteria consists of two contrastive phrases, x and y, formatted like: {x vs. y}. Scores closer to -1 indicate the word is more correlated to {x}, while scores closer to 1 indicate the word is more correlated to {y}. Scores closer to 0 indicate that the word is more neutral with respect to the criteria, falls in between the extremes, or isn't really related to either {x} or {y}. You don't need to justify your scores, just provide the numbers.

For each criteria, your output should be a python list of scores (length = 100, because there are 100 words). Print each criteria on a line followed by its corresponding list like this on another line: 

{criteria}:
[score 1, score 2, ...]. 

You should not need code to perform this task, just assign scores qualitatively. Don't print anything else except for the list of scores, and don't format anything as code.

Please start with the first criteria: {criteria}. Only do one criteria at a time.
```

In [12]:
# generated vals for each axis
# ❗️ note: these may not be the right length -- need to check/fix later
axis0_1 = [-1, -0.5, -0.8, -0.2, 1, -0.7, -0.8, -0.7, 0.5, 0, 0, 0, 0, 0, -0.5, -0.8, 0, -0.8, -0.5, 1, 0, 0, 0, -0.8, 1, -0.6, -0.5, 0, 0.5, -0.5, -0.8, -0.8, -0.7, 0, 1, 0, 0, -0.9, 0.5, -0.5, -0.8, 0, 0, 0, -0.8, 1, 0, 0.5, -0.8, 0.5, 0, 0.5, -1, 0.5, -1, 0.5, -0.7, 1, 0.8, 0, 0, 0, 0.5, 1, -0.5, 0.5, 0, 0, 1, 0, 0, 0.5, 1, -0.5, -0.8, 0, -0.5, 0.5, -0.8, 1, -0.8, 0.5, 1, -0.5, -1, 0.5, 0, -0.8, 0.5, 0, 0, 0, -0.8, -0.8, 0.5, -0.7, -0.8, -0.8, 0, 0, -0.8, 0.5, -0.8, -0.8, 0.5, -0.8, 0, 1, 0.5, 0, 0, 0.5]
axis0_2 = [-1.0, -0.5, -0.8, -0.5, 1.0, -0.8, -0.8, -0.7, 0.6, 0.0, -0.2, 0.3, 0.0, 0.0, -0.8, -0.8, 0.5, -0.9, -0.3, 1.0, 0.0, 0.3, -1.0, -0.7, 1.0, -0.8, -0.7, -0.3, 1.0, -0.7, -0.9, -0.8, -0.7, -0.6, 1.0, -0.5, -0.6, -0.9, -0.6, -0.8, -0.8, -0.9, 0.8, 0.0, -0.6, 1.0, -0.8, -0.8, -0.8, -0.8, -0.7, -0.8, 1.0, -0.8, 0.8, -0.9, -0.8, -0.3, -0.7, -0.2, 0.0, -0.7, 0.0, -0.8, -0.8, 0.7, -0.8, 0.0, 0.7, 0.0, 0.0, 0.0, -0.8, -0.7, -0.8, 1.0, 0.5, -0.8, -0.8, -0.8, -0.9, -0.8, -0.8, -0.9, -0.7, -0.8, 1.0, -0.8, -0.7, 0.0, -0.7, 0.7, -0.8, -0.7, -0.8, -0.9, -0.8, -0.7, -0.9, -0.7, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8]
axis0_3 = [-0.8, -0.3, -0.6, -0.1, 1.0, -0.7, -0.6, -0.5, 0.4, 0.1, -0.2, 0.3, -0.1, 0.2, -0.5, -0.6, 0.5, -0.7, -0.2, 0.8, -0.1, 0.2, -0.8, -0.4, 0.9, -0.3, -0.5, -0.1, 0.7, -0.4, -0.6, -0.5, -0.4, -0.3, 0.6, -0.2, -0.5, -0.8, -0.4, -0.5, -0.3, 0.0, 0.1, -0.2, -0.6, -0.9, -0.8, -0.8, -0.7, -0.8, -0.9, -0.7, -0.8, -0.8, -0.6, -0.8, -0.7, -0.6, 0.1, -0.2, -0.3, -0.5, 0.0, -0.5, -0.7, -0.4, -0.6, -0.2, 0.0, -0.8, -0.5, -0.5, -0.7, -0.6, -0.3, 0.7, -0.4, -0.7, -0.8, -0.5, -0.5, 0.3, 0.2, 0.5, -0.6, -0.5, -0.4, 0.8, -0.5, 0.3, -0.7, -0.8, -0.7, -0.6, -0.7, -0.5, -0.4, -0.7, -0.3, -0.4, -0.6, -0.7, -0.8, -0.6, -0.7, -0.7, -0.7, -0.6, -0.7, -0.7, -0.5, -0.6, -0.7, -0.5, -0.7, -0.7, -0.6]
axis1_1 = [-0.5, -0.2, -0.5, -0.2, 0.5, -0.5, -0.5, -0.2, 0.2, 0.5, -0.5, 0.5, 0.2, 0, -0.5, -0.5, 0.5, -0.8, -0.2, 1, 0.3, -0.2, -0.8, -0.5, 1, -0.2, -0.5, -0.3, 0.8, -0.2, -0.5, -0.5, -0.2, -0.5, 0.5, 0.3, -0.2, -0.8, 0.5, -0.5, -0.5, 0, 0.5, 0.3, -0.5, 1, -0.5, 0.5, -0.8, 0.5, 0.2, 0.5, -1, 0.8, -1, 0.5, -0.5, 1, 0.8, 0, 0, -0.2, 0.5, 1, -0.5, 0.8, 0, 0, 1, 0.2, 0, 0.5, 1, -0.5, -0.5, 0, -0.5, 0.8, -0.5, 1, -0.5, 0.8, 1, -0.2, -1, 0.5, 0, -0.5, 0.8, 0, 0, 0, -0.5, -0.5, 0.8, -0.2, -0.5, -0.5, 0, 0, -0.5, 0.5, -0.5, -0.5, 0.8, -0.5, 0, 1, 0.5, 0, 0, 0.8]
axis1_2 = [-0.5, -0.2, -0.3, -0.2, 0.7, -0.6, -0.5, -0.4, 0.4, 0.3, -0.1, 0.5, 0.0, 0.1, -0.6, -0.5, 0.8, -0.4, -0.1, 0.9, 0.2, 0.1, -0.6, -0.5, 0.8, -0.5, -0.3, 0.0, 0.7, -0.2, -0.6, -0.5, -0.4, -0.3, 0.7, -0.2, -0.3, -0.5, -0.4, -0.6, -0.5, -0.6, 0.8, 0.1, -0.5, 0.8, -0.6, -0.6, -0.6, -0.6, -0.5, -0.6, 0.8, -0.6, 0.8, -0.7, -0.6, -0.1, -0.4, 0.0, 0.1, -0.4, 0.1, -0.6, -0.6, 0.6, -0.6, 0.1, 0.7, 0.2, 0.1, 0.1, -0.6, -0.5, -0.6, 0.9, 0.6, -0.6, -0.6, -0.6, -0.7, -0.6, -0.6, -0.7, -0.5, -0.6, 0.9, -0.6, -0.5, 0.1, -0.5, 0.7, -0.6, -0.5, -0.6, -0.7, -0.6, -0.5, -0.7, -0.5, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6]
axis1_3 = [-0.4, -0.1, -0.3, -0.2, 0.5, 0.1, -0.3, -0.2, 0.3, 0.2, -0.1, 0.4, 0.0, 0.1, -0.2, -0.3, 0.6, -0.4, 0.1, 0.7, 0.0, 0.2, -0.5, -0.3, 0.8, -0.1, -0.3, 0.0, 0.6, 0.1, -0.2, -0.3, -0.2, -0.1, 0.4, -0.1, -0.2, -0.5, -0.3, -0.2, -0.1, 0.3, 0.2, -0.1, -0.2, -0.7, -0.6, -0.5, -0.6, -0.6, -0.7, -0.6, -0.6, -0.5, -0.4, -0.6, -0.5, -0.4, 0.2, -0.1, 0.0, -0.3, 0.1, -0.3, -0.5, -0.2, -0.4, 0.1, 0.2, -0.6, -0.3, -0.3, -0.5, -0.4, -0.1, 0.7, -0.2, -0.5, -0.6, -0.3, -0.3, 0.5, 0.4, 0.6, -0.4, -0.3, -0.2, 0.8, -0.3, 0.5, -0.5, -0.6, -0.5, -0.4, -0.5, -0.3, -0.2, -0.5, -0.1, -0.2, -0.4, -0.5, -0.6, -0.4, -0.5, -0.5, -0.5, -0.4, -0.5, -0.5, -0.3, -0.4, -0.5, -0.3, -0.5, -0.5, -0.4]
axis2_1 = [1, 0, -0.5, 0.2, 0.8, -0.7, -0.8, -0.3, 1, 0.5, -0.2, 0.8, 0.5, 0.3, -0.7, -0.8, 0.8, -1, -0.2, 1, 0.5, 0.3, -1, -0.5, 1, -0.5, -0.8, 0, 0.7, -0.3, -0.8, -0.8, -0.5, -0.2, 0.7, 0.5, 0.2, -1, 0.6, -0.7, -0.8, 1, 0.5, 0.5, -0.8, 1, 0.3, 0.8, -1, 0.8, 0.5, 0.8, -1, 1, -1, 0.8, -0.7, 1, 1, 0.3, 0, 0.2, 0.8, 1, -0.7, 1, 0, 0, 1, 0.5, 0, 0.8, 1, -0.7, -0.8, 0.3, -0.7, 1, -0.8, 1, -0.8, 1, 1, -0.7, -1, 1, 0, -0.7, 1, 0, 0, 0, -0.8, -0.8, 1, -0.5, -0.8, -0.8, 0.3, 0, -0.8, 1, -0.8, -0.8, 1, -0.8, 0, 1, 1, 0, 0, 1]
axis2_2 = [0.9, 0.3, -0.6, 0.2, 0.7, -0.5, -0.7, -0.2, 0.8, 0.4, 0.1, 0.6, 0.0, 0.3, -0.7, -0.6, 0.7, -0.8, -0.2, 0.9, 0.3, 0.5, -0.9, -0.6, 0.9, -0.7, -0.4, 0.0, 0.8, -0.3, -0.8, -0.6, -0.5, -0.4, 0.7, -0.3, -0.4, -0.9, -0.5, -0.7, -0.7, -0.9, 0.9, 0.2, -0.6, 0.9, -0.7, -0.7, -0.7, -0.7, -0.6, -0.7, 0.9, -0.7, 0.8, -0.8, -0.7, 0.0, -0.5, 0.1, 0.3, -0.5, 0.2, -0.7, -0.7, 0.8, -0.7, 0.2, 0.8, 0.3, 0.2, 0.2, -0.7, -0.6, -0.7, 0.9, 0.7, -0.7, -0.7, -0.7, -0.8, -0.7, -0.7, -0.8, -0.6, -0.7, 0.9, -0.7, -0.6, 0.2, -0.6, 0.8, -0.7, -0.6, -0.7, -0.8, -0.7, -0.6, -0.8, -0.6, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7]
axis2_3 = [0.7, 0.2, 0.1, 0.3, 0.9, 0.0, -0.2, 0.1, 0.8, 0.4, 0.3, 0.6, 0.5, 0.4, 0.1, -0.1, 0.7, -0.3, 0.5, 0.8, 0.3, 0.6, -0.7, 0.1, 0.9, 0.2, -0.1, 0.0, 0.7, 0.4, -0.2, -0.1, 0.0, 0.2, 0.8, 0.1, 0.1, -0.7, 0.0, 0.1, 0.2, 0.9, 0.3, 0.2, 0.0, 0.8, 0.7, 0.6, 0.7, 0.7, 0.8, 0.7, 0.7, 0.6, 0.5, 0.7, 0.6, 0.5, 0.9, 0.4, 0.3, 0.1, 0.6, 0.1, 0.6, 0.3, 0.2, 0.6, 0.4, 0.7, 0.4, 0.4, 0.6, 0.5, 0.2, 1.0, 0.3, 0.6, 0.7, 0.0, 0.0, 0.8, 0.7, 0.9, 0.1, 0.0, 0.1, 0.9, 0.0, 0.8, 0.6, 0.7, 0.6, 0.5, 0.6, 0.0, 0.1, 0.6, 0.2, 0.1, 0.0, 0.6, 0.5, 0.6, 0.6, 0.5, 0.5, 0.5, 0.3, 0.0, 0.5, 0.2, 0.6, 0.6, 0.5]
axis3_1 = [-0.7, 0.2, -0.8, 0.3, 0.7, -0.6, -0.8, -0.5, 0.5, 0.3, -0.5, 0.8, -0.2, 0.1, -0.7, -0.8, 0.6, -0.9, -0.4, 0.8, 0.2, -0.2, -0.8, -0.8, 0.9, -0.5, -0.6, -0.3, 0.7, -0.5, -0.8, -0.7, -0.5, -0.5, 0.7, 0.2, -0.1, -0.9, 0.6, -0.7, -0.8, 0.4, 0.3, 0.2, -0.8, 0.8, -0.7, 0.5, -0.9, 0.6, 0.2, 0.6, -1, 0.8, -1, 0.7, -0.6, 0.9, 0.8, 0.2, -0.1, -0.2, 0.8, 0.9, -0.7, 0.8, 0, 0, 0.9, 0.3, 0, 0.7, 0.9, -0.7, -0.8, 0.2, -0.7, 0.7, -0.8, 0.9, -0.8, 0.8, 0.9, -0.6, -1, 0.7, 0, -0.7, 0.7, 0, 0, 0, -0.8, -0.8, 0.7, -0.5, -0.8, -0.8, 0.2, 0, -0.8, 0.8, -0.8, -0.8, 0.8, -0.8, 0, 0.9, 0.7, 0, 0, 0.8]
axis3_2 = [-0.6, -0.2, -0.5, 0.0, 0.8, -0.7, -0.6, -0.3, 0.5, 0.2, -0.1, 0.8, 0.0, 0.3, -0.6, -0.5, 0.7, -0.7, -0.3, 0.9, 0.1, 0.4, -0.8, -0.5, 0.9, -0.6, -0.4, 0.0, 0.8, -0.4, -0.7, -0.5, -0.4, -0.3, 0.8, -0.2, -0.3, -0.8, -0.4, -0.6, -0.5, -0.8, 0.9, 0.0, -0.5, 0.9, -0.6, -0.6, -0.6, -0.6, -0.5, -0.6, 0.9, -0.6, 0.8, -0.8, -0.6, 0.1, -0.4, 0.0, 0.2, -0.4, 0.3, -0.6, -0.6, 0.8, -0.6, 0.1, 0.8, 0.2, 0.2, 0.2, -0.6, -0.5, -0.6, 0.9, 0.7, -0.6, -0.6, -0.6, -0.7, -0.6, -0.6, -0.7, -0.5, -0.6, 0.9, -0.6, -0.5, 0.3, -0.5, 0.8, -0.6, -0.5, -0.6, -0.7, -0.6, -0.5, -0.7, -0.5, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6]
axis3_3 = [-0.5, -0.1, -0.4, -0.2, 0.8, -0.3, -0.5, -0.4, 0.6, 0.2, -0.2, 0.7, -0.1, 0.3, -0.4, -0.5, 0.5, -0.6, -0.1, 0.9, 0.0, 0.4, -0.7, -0.3, 0.9, -0.2, -0.4, 0.0, 0.8, -0.2, -0.4, -0.4, -0.3, -0.2, 0.7, -0.1, -0.3, -0.7, -0.4, -0.4, -0.2, 0.6, 0.3, 0.0, -0.3, -0.8, -0.7, -0.6, -0.7, -0.7, -0.8, -0.7, -0.7, -0.6, -0.5, -0.7, -0.6, -0.5, 0.8, 0.3, 0.2, -0.2, 0.5, -0.2, -0.6, -0.3, -0.4, 0.4, 0.3, -0.7, -0.3, -0.3, -0.6, -0.5, -0.1, 0.9, -0.3, -0.6, -0.7, -0.4, -0.3, 0.7, 0.6, 0.8, -0.4, -0.3, -0.2, 1.0, -0.3, 0.7, -0.6, -0.7, -0.6, -0.5, -0.6, -0.3, -0.2, -0.6, -0.1, -0.3, -0.4, -0.5, -0.6, -0.4, -0.5, -0.5, -0.5, -0.4, -0.5, -0.5, -0.3, -0.4, -0.5, -0.3, -0.5, -0.5, -0.4]
axis4_1 = [-0.4, -0.2, -0.5, -0.3, 0.6, -0.7, -0.5, -0.4, 0.2, 0.7, -0.6, 0.5, -0.2, 0.1, -0.6, -0.5, 0.7, -0.8, -0.3, 0.9, 0.6, -0.3, -0.8, -0.5, 0.8, -0.4, -0.6, -0.2, 0.6, -0.3, -0.5, -0.5, -0.4, -0.6, 0.6, 0.6, -0.2, -0.9, 0.7, -0.5, -0.5, 0.3, 0.6, 0.6, -0.5, 0.9, -0.3, 0.6, -0.8, 0.7, 0.4, 0.7, -0.9, 0.9, -0.9, 0.7, -0.6, 0.9, 0.9, 0.4, 0, -0.2, 0.7, 0.8, -0.6, 0.9, 0, 0, 0.8, 0.5, 0, 0.6, 0.8, -0.6, -0.5, 0.3, -0.6, 0.7, -0.5, 0.9, -0.5, 0.8, 0.9, -0.4, -0.9, 0.7, 0, -0.5, 0.7, 0, 0, 0, -0.5, -0.5, 0.7, -0.4, -0.5, -0.5, 0.3, 0, -0.5, 0.9, -0.5, -0.5, 0.8, -0.5, 0, 0.9, 0.7, 0, 0, 0.8]
axis4_2 = [-0.3, -0.1, -0.4, -0.2, 0.6, -0.5, -0.4, -0.3, 0.4, 0.5, -0.2, 0.6, 0.0, 0.2, -0.5, -0.4, 0.7, -0.6, -0.2, 0.8, 0.3, 0.3, -0.7, -0.4, 0.7, -0.4, -0.3, 0.0, 0.6, -0.3, -0.5, -0.4, -0.3, -0.2, 0.6, -0.1, -0.2, -0.7, -0.3, -0.5, -0.4, -0.7, 0.7, 0.1, -0.4, 0.7, -0.5, -0.5, -0.5, -0.5, -0.4, -0.5, 0.7, -0.5, 0.7, -0.6, -0.5, 0.1, -0.3, 0.0, 0.2, -0.3, 0.3, -0.5, -0.5, 0.6, -0.5, 0.1, 0.7, 0.2, 0.2, 0.2, -0.5, -0.4, -0.5, 0.8, 0.6, -0.5, -0.5, -0.5, -0.6, -0.5, -0.5, -0.6, -0.4, -0.5, 0.8, -0.5, -0.4, 0.2, -0.4, 0.7, -0.5, -0.4, -0.5, -0.6, -0.5, -0.4, -0.6, -0.4, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5]
axis4_3 = [-0.3, 0.1, -0.2, -0.1, 0.6, -0.2, -0.3, -0.2, 0.4, 0.3, -0.1, 0.5, 0.2, 0.3, -0.3, -0.3, 0.6, -0.4, 0.0, 0.8, 0.1, 0.3, -0.6, -0.2, 0.7, -0.1, -0.3, 0.1, 0.7, 0.2, -0.2, -0.2, -0.1, 0.0, 0.6, -0.1, -0.2, -0.6, -0.2, -0.2, -0.1, 0.5, 0.4, 0.1, -0.2, -0.7, -0.6, -0.5, -0.6, -0.6, -0.7, -0.6, -0.6, -0.5, -0.4, -0.6, -0.5, -0.4, 0.8, 0.4, 0.3, -0.2, 0.6, -0.2, -0.5, -0.2, -0.3, 0.5, 0.4, -0.6, -0.2, -0.2, -0.5, -0.4, 0.0, 0.8, -0.2, -0.5, -0.6, -0.2, -0.2, 0.6, 0.5, 0.7, -0.3, -0.2, -0.1, 0.9, -0.2, 0.6, -0.5, -0.6, -0.5, -0.4, -0.5, -0.2, -0.1, -0.5, 0.0, -0.2, -0.3, -0.4, -0.5, -0.3, -0.4, -0.4, -0.4, -0.3, -0.4, -0.4, -0.2, -0.3, -0.4, -0.2, -0.4, -0.4, -0.3]
axis5_1 = [1, 0.5, -0.5, 0.3, 0.8, -0.6, -0.7, -0.2, 0.9, 0.6, -0.3, 0.9, 0.4, 0.2, -0.6, -0.7, 0.9, -0.9, -0.1, 1, 0.6, 0.1, -0.9, -0.5, 1, -0.3, -0.6, 0, 0.8, -0.2, -0.7, -0.6, -0.4, -0.3, 0.8, 0.6, 0, -0.9, 0.7, -0.6, -0.7, 0.5, 0.6, 0.6, -0.7, 1, 0, 0.8, -0.9, 0.8, 0.6, 0.9, -1, 1, -1, 0.9, -0.6, 1, 1, 0.4, -0.2, -0.1, 0.9, 1, -0.6, 1, 0, 0, 1, 0.6, 0, 0.9, 1, -0.6, -0.7, 0.2, -0.6, 0.9, -0.7, 1, -0.7, 1, 1, -0.5, -1, 0.9, 0, -0.6, 0.9, 0, 0, 0, -0.7, -0.7, 1, -0.4, -0.7, -0.7, 0.4, 0, -0.7, 1, -0.7, -0.7, 1, -0.7, 0, 1, 1, 0, 0, 1]
axis5_2 = [0.8, 0.2, -0.5, 0.1, 0.6, -0.4, -0.6, -0.1, 0.7, 0.3, 0.0, 0.5, 0.0, 0.2, -0.6, -0.5, 0.6, -0.7, -0.1, 0.8, 0.2, 0.4, -0.8, -0.5, 0.8, -0.6, -0.3, -0.1, 0.7, -0.2, -0.7, -0.5, -0.4, -0.3, 0.6, -0.2, -0.3, -0.8, -0.4, -0.6, -0.5, -0.8, 0.8, 0.1, -0.5, 0.8, -0.6, -0.6, -0.6, -0.6, -0.5, -0.6, 0.8, -0.6, 0.7, -0.7, -0.6, 0.0, -0.4, 0.0, 0.1, -0.4, 0.1, -0.6, -0.6, 0.7, -0.6, 0.1, 0.7, 0.2, 0.1, 0.1, -0.6, -0.4, -0.6, 0.8, 0.6, -0.6, -0.6, -0.6, -0.7, -0.6, -0.6, -0.7, -0.4, -0.6, 0.8, -0.6, -0.4, 0.1, -0.4, 0.7, -0.6, -0.4, -0.6, -0.7, -0.6, -0.4, -0.7, -0.4, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6]
axis5_3 = [0.8, 0.3, 0.2, 0.4, 0.9, 0.1, -0.2, 0.1, 0.7, 0.5, 0.4, 0.7, 0.6, 0.5, 0.2, -0.1, 0.8, -0.3, 0.6, 0.8, 0.4, 0.7, -0.8, 0.2, 0.9, 0.3, -0.2, 0.1, 0.8, 0.5, -0.1, -0.1, 0.0, 0.3, 0.9, 0.2, 0.2, -0.8, 0.1, 0.2, 0.3, 1.0, 0.4, 0.3, 0.1, 0.9, 0.8, 0.7, 0.8, 0.8, 0.9, 0.8, 0.8, 0.7, 0.6, 0.8, 0.7, 0.6, 1.0, 0.5, 0.4, 0.2, 0.7, 0.2, 0.7, 0.4, 0.3, 0.7, 0.5, 0.8, 0.5, 0.5, 0.7, 0.6, 0.3, 1.0, 0.4, 0.7, 0.8, 0.1, 0.1, 0.9, 0.8, 1.0, 0.2, 0.1, 0.2, 1.0, 0.1, 0.9, 0.7, 0.8, 0.7, 0.6, 0.7, 0.1, 0.2, 0.7, 0.3, 0.2, 0.1, 0.7, 0.6, 0.7, 0.7, 0.6, 0.6, 0.6, 0.4, 0.1, 0.6, 0.3, 0.7, 0.7, 0.6]
axis6_1 = [-0.8, -0.2, -0.9, -0.3, 1, -0.8, -0.9, -0.7, 0.6, 0.1, -0.5, 0.7, 0, 0.2, -0.8, -0.9, 0.5, -1, -0.4, 1, 0.2, -0.4, -1, -0.9, 1, -0.6, -0.8, -0.5, 0.8, -0.5, -0.9, -0.8, -0.7, -0.5, 1, 0.2, -0.4, -1, 0.7, -0.8, -0.9, 0.4, 0.5, 0.3, -0.9, 1, -0.6, 0.7, -1, 0.8, 0.3, 0.8, -1, 1, -1, 0.8, -0.7, 1, 1, 0.5, -0.3, -0.4, 0.8, 1, -0.8, 1, 0, 0, 1, 0.6, 0, 0.8, 1, -0.8, -0.9, 0.2, -0.7, 0.9, -0.9, 1, -0.9, 1, 1, -0.7, -1, 0.9, 0, -0.7, 0.9, 0, 0, 0, -0.9, -0.9, 1, -0.6, -0.9, -0.9, 0.4, 0, -0.9, 1, -0.9, -0.9, 1, -0.9, 0, 1, 1, 0, 0, 1]
axis6_2 = [-0.8, -0.3, -0.9, -0.2, 0.8, -0.8, -0.9, -0.7, 0.6, 0.1, -0.1, 0.5, 0.0, 0.2, -0.8, -0.8, 0.5, -0.9, -0.4, 0.9, 0.0, 0.3, -0.9, -0.8, 0.9, -0.8, -0.6, -0.2, 0.8, -0.6, -0.9, -0.8, -0.7, -0.5, 0.8, -0.4, -0.5, -0.9, -0.6, -0.8, -0.8, -0.9, 0.8, 0.0, -0.7, 0.9, -0.8, -0.8, -0.8, -0.8, -0.7, -0.8, 0.9, -0.8, 0.8, -0.9, -0.8, -0.1, -0.6, 0.0, 0.2, -0.6, 0.2, -0.8, -0.8, 0.8, -0.8, 0.0, 0.8, 0.1, 0.1, 0.1, -0.8, -0.7, -0.8, 0.9, 0.7, -0.8, -0.8, -0.8, -0.9, -0.8, -0.8, -0.9, -0.7, -0.8, 0.9, -0.8, -0.7, 0.0, -0.7, 0.8, -0.8, -0.7, -0.8, -0.9, -0.8, -0.7, -0.9, -0.7, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8]
axis6_3 = [-0.6, -0.2, -0.7, -0.1, 0.9, -0.5, -0.7, -0.6, 0.5, 0.1, -0.1, 0.3, -0.1, 0.2, -0.4, -0.7, 0.4, -0.7, -0.1, 0.8, 0.0, 0.3, -0.8, -0.5, 0.9, -0.3, -0.4, -0.2, 0.7, -0.2, -0.6, -0.6, -0.4, -0.3, 0.7, -0.2, -0.4, -0.8, -0.5, -0.4, -0.3, 0.1, 0.2, 0.0, -0.5, -0.9, -0.8, -0.7, -0.8, -0.8, -0.9, -0.8, -0.8, -0.7, -0.6, -0.8, -0.7, -0.6, 0.8, -0.2, 0.0, -0.3, 0.6, -0.3, -0.7, -0.4, -0.5, 0.6, 0.3, -0.8, -0.4, -0.4, -0.7, -0.6, -0.1, 0.8, -0.3, -0.7, -0.8, -0.5, -0.4, 0.5, 0.4, 0.8, -0.5, -0.4, -0.3, 0.9, -0.4, 0.6, -0.8, -0.8, -0.7, -0.6, -0.7, -0.4, -0.3, -0.7, -0.1, -0.3, -0.4, -0.5, -0.6, -0.4, -0.5, -0.5, -0.5, -0.4, -0.5, -0.5, -0.3, -0.4, -0.5, -0.3, -0.5, -0.5, -0.4]
axis7_1 = [0.6, 0.2, -0.4, 0.4, 0.9, -0.5, -0.6, -0.2, 0.8, 1, -0.3, 0.8, 0.3, 0.4, -0.5, -0.6, 0.7, -0.8, -0.1, 1, 1, 0.1, -0.8, -0.5, 0.9, -0.3, -0.5, 0, 0.9, -0.2, -0.6, -0.5, -0.2, -0.3, 0.9, 1, 0, -0.8, 0.8, -0.5, -0.6, 0.6, 0.7, 1, -0.6, 1, 0, 0.9, -0.8, 0.9, 0.5, 0.9, -0.9, 1, -0.9, 0.9, -0.5, 1, 1, 0.6, 0, 0.1, 0.8, 1, -0.5, 1, 0, 0, 0.9, 0.7, 0, 0.8, 1, -0.5, -0.6, 0.3, -0.5, 0.9, -0.6, 1, -0.6, 1, 1, -0.4, -0.9, 0.9, 0, -0.5, 0.9, 0, 0, 0, -0.6, -0.6, 1, -0.2, -0.6, -0.6, 0.5, 0, -0.6, 1, -0.6, -0.6, 1, -0.6, 0, 1, 1, 0, 0, 1]
axis7_2 = [0.8, 0.3, -0.6, 0.2, 0.9, -0.5, -0.7, -0.2, 0.7, 0.9, 0.1, 0.7, 0.0, 0.4, -0.7, -0.6, 0.8, -0.8, -0.1, 1.0, 0.9, 0.6, -0.9, -0.6, 0.9, -0.7, -0.4, 0.0, 0.9, -0.2, -0.8, -0.6, -0.5, -0.4, 0.9, -0.3, -0.4, -0.9, -0.5, -0.7, -0.7, -0.9, 0.9, 0.9, -0.6, 0.9, -0.7, -0.7, -0.7, -0.7, -0.6, -0.7, 0.9, -0.7, 0.9, -0.8, -0.7, 0.1, -0.5, 0.1, 0.9, -0.5, 0.9, -0.7, -0.7, 0.9, -0.7, 0.3, 0.9, 0.9, 0.9, 0.9, -0.7, -0.6, -0.7, 1.0, 0.9, -0.7, -0.7, -0.7, -0.8, -0.7, -0.7, -0.8, -0.6, -0.7, 1.0, -0.7, -0.6, 0.9, -0.6, 0.9, -0.7, -0.6, -0.7, -0.8, -0.7, -0.6, -0.8, -0.6, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7]
axis7_3 = [0.8, 0.4, 0.3, 0.5, 0.9, 0.2, -0.1, 0.2, 0.8, 1.0, 0.5, 0.7, 0.6, 0.6, 0.3, -0.2, 0.9, -0.4, 0.6, 0.9, 0.8, 0.7, -0.9, 0.3, 1.0, 0.4, -0.3, 0.2, 0.9, 0.6, -0.1, -0.1, 0.1, 0.4, 0.9, 0.3, 0.3, -0.9, 0.2, 0.3, 0.4, 1.0, 0.5, 0.4, 0.2, 1.0, 0.9, 0.8, 0.9, 0.9, 1.0, 0.9, 0.9, 0.8, 0.7, 0.9, 0.8, 0.7, 1.0, 0.6, 0.5, 0.3, 0.8, 0.3, 0.8, 0.5, 0.4, 0.8, 0.6, 0.9, 0.6, 0.6, 0.8, 0.7, 0.4, 1.0, 0.5, 0.8, 0.9, 0.2, 0.2, 1.0, 0.9, 1.0, 0.3, 0.2, 0.3, 1.0, 0.2, 0.9, 0.8, 0.9, 0.8, 0.7, 0.8, 0.2, 0.3, 0.8, 0.4, 0.3, 0.2, 0.8, 0.7, 0.8, 0.8, 0.7, 0.7, 0.7, 0.5, 0.2, 0.7, 0.4, 0.8, 0.8, 0.7]
axis8_1 = [0.7, 0.4, -0.4, 0.5, 1, -0.4, -0.5, -0.2, 0.8, 1, -0.2, 0.9, 0.4, 0.5, -0.4, -0.5, 0.8, -0.8, 0, 1, 1, 0.2, -0.8, -0.5, 1, -0.3, -0.5, 0.1, 0.9, -0.2, -0.5, -0.4, -0.2, -0.2, 1, 1, 0.1, -0.9, 0.8, -0.4, -0.5, 0.6, 0.8, 1, -0.5, 1, 0.1, 0.8, -0.9, 0.9, 0.6, 1, -1, 1, -1, 1, -0.4, 1, 1, 0.7, 0.1, 0.2, 0.9, 1, -0.4, 1, 0, 0, 1, 0.8, 0, 0.9, 1, -0.4, -0.5, 0.3, -0.4, 1, -0.5, 1, -0.5, 1, 1, -0.3, -0.9, 1, 0.1, -0.4, 1, 0, 0, 0, -0.5, -0.5, 1, -0.1, -0.5, -0.5, 0.6, 0, -0.5, 1, -0.5, -0.5, 1, -0.5, 0.1, 1, 1, 0.1, 0.1, 1]
axis8_2 = [0.7, 0.2, -0.5, 0.1, 0.8, -0.4, -0.6, -0.1, 0.6, 0.8, 0.0, 0.6, 0.0, 0.3, -0.6, -0.5, 0.7, -0.7, -0.1, 0.9, 0.7, 0.5, -0.8, -0.5, 0.8, -0.6, -0.3, 0.0, 0.8, -0.2, -0.7, -0.5, -0.4, -0.3, 0.8, -0.2, -0.3, -0.8, -0.4, -0.6, -0.5, -0.8, 0.8, 0.7, -0.5, 0.8, -0.6, -0.6, -0.6, -0.6, -0.5, -0.6, 0.8, -0.6, 0.8, -0.7, -0.6, 0.1, -0.4, 0.1, 0.8, -0.4, 0.8, -0.6, -0.6, 0.8, -0.6, 0.2, 0.8, 0.8, 0.8, 0.8, -0.6, -0.4, -0.6, 0.9, 0.8, -0.6, -0.6, -0.6, -0.7, -0.6, -0.6, -0.7, -0.4, -0.6, 0.9, -0.6, -0.4, 0.8, -0.4, 0.8, -0.6, -0.4, -0.6, -0.7, -0.6, -0.4, -0.7, -0.4, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6, -0.6]
axis8_3 = [0.7, 0.3, 0.2, 0.4, 0.8, 0.1, -0.2, 0.2, 0.7, 0.9, 0.4, 0.6, 0.5, 0.5, 0.2, -0.3, 0.8, -0.4, 0.5, 0.9, 0.7, 0.6, -0.8, 0.2, 0.9, 0.3, -0.2, 0.1, 0.8, 0.5, -0.1, -0.2, 0.0, 0.3, 0.8, 0.2, 0.2, -0.8, 0.1, 0.2, 0.3, 0.9, 0.5, 0.3, 0.1, 0.9, 0.8, 0.7, 0.8, 0.8, 0.9, 0.8, 0.8, 0.7, 0.6, 0.8, 0.7, 0.6, 0.9, 0.5, 0.4, 0.2, 0.7, 0.2, 0.7, 0.4, 0.3, 0.7, 0.5, 0.8, 0.5, 0.5, 0.7, 0.6, 0.3, 0.9, 0.4, 0.7, 0.8, 0.1, 0.1, 0.9, 0.8, 0.9, 0.2, 0.1, 0.2, 0.9, 0.1, 0.8, 0.7, 0.8, 0.7, 0.6, 0.7, 0.1, 0.2, 0.7, 0.3, 0.2, 0.1, 0.7, 0.6, 0.7, 0.7, 0.6, 0.6, 0.6, 0.4, 0.1, 0.6, 0.3, 0.7, 0.7, 0.6]
axis9_1 = [0.5, 0.2, -0.6, 0.4, 1, -0.5, -0.7, -0.4, 0.7, 0.5, -0.4, 0.8, 0.3, 0.5, -0.5, -0.7, 0.6, -0.9, -0.2, 1, 0.4, 0.1, -0.9, -0.6, 1, -0.4, -0.6, -0.2, 0.8, -0.3, -0.7, -0.6, -0.4, -0.4, 1, 0.4, -0.2, -1, 0.7, -0.5, -0.7, 0.5, 0.6, 0.5, -0.7, 1, -0.4, 0.7, -0.9, 0.8, 0.4, 0.8, -1, 1, -1, 0.8, -0.5, 1, 1, 0.5, -0.1, -0.2, 0.8, 1, -0.5, 1, 0, 0, 1, 0.6, 0, 0.8, 1, -0.5, -0.7, 0.3, -0.5, 0.9, -0.6, 1, -0.7, 1, 1, -0.5, -1, 0.9, 0.1, -0.5, 0.9, 0, 0, 0, -0.7, -0.7, 1, -0.4, -0.7, -0.7, 0.5, 0, -0.7, 1, -0.7, -0.7, 1, -0.7, 0.2, 1, 1, 0.2, 0.2, 1]
axis9_2 = [0.7, 0.1, -0.8, 0.0, 0.9, -0.7, -0.8, -0.4, 0.5, 0.2, 0.0, 0.7, 0.0, 0.4, -0.7, -0.7, 0.6, -0.9, -0.2, 0.9, 0.1, 0.4, -0.9, -0.7, 0.9, -0.7, -0.5, 0.0, 0.8, -0.5, -0.8, -0.7, -0.6, -0.4, 0.9, -0.3, -0.4, -0.9, -0.5, -0.7, -0.7, -0.9, 0.9, 0.1, -0.6, 0.9, -0.7, -0.7, -0.7, -0.7, -0.6, -0.7, 0.9, -0.7, 0.8, -0.8, -0.7, 0.0, -0.5, 0.0, 0.3, -0.5, 0.7, -0.7, -0.7, 0.8, -0.7, 0.1, 0.9, 0.3, 0.3, 0.3, -0.7, -0.6, -0.7, 0.9, 0.8, -0.7, -0.7, -0.7, -0.8, -0.7, -0.7, -0.8, -0.6, -0.7, 0.9, -0.7, -0.6, 0.3, -0.6, 0.8, -0.7, -0.6, -0.7, -0.8, -0.7, -0.6, -0.8, -0.6, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7, -0.7]
axis9_3 = [0.7, 0.3, 0.1, 0.4, 0.9, 0.0, -0.3, 0.2, 0.6, 0.5, 0.3, 0.7, 0.5, 0.6, 0.1, -0.4, 0.7, -0.5, 0.6, 0.8, 0.4, 0.6, -0.8, 0.1, 0.9, 0.2, -0.3, 0.0, 0.7, 0.5, -0.2, -0.3, -0.1, 0.2, 0.8, 0.1, 0.1, -0.8, 0.0, 0.1, 0.2, 0.8, 0.4, 0.3, 0.0, 0.9, 0.8, 0.7, 0.8, 0.8, 0.9, 0.8, 0.8, 0.7, 0.6, 0.8, 0.7, 0.6, 0.9, 0.4, 0.3, 0.1, 0.7, 0.1, 0.7, 0.3, 0.2, 0.7, 0.5, 0.8, 0.4, 0.4, 0.7, 0.6, 0.2, 1.0, 0.3, 0.7, 0.8, 0.0, 0.0, 0.9, 0.8, 1.0, 0.1, 0.0, 0.1, 1.0, 0.0, 0.8, 0.6, 0.7, 0.6, 0.5, 0.6, 0.0, 0.1, 0.6, 0.2, 0.1, 0.0, 0.6, 0.5, 0.6, 0.6, 0.5, 0.5, 0.5, 0.3, 0.0, 0.5, 0.2, 0.6, 0.6, 0.5]

# combine all the axis data into a list of lists
all_axes = [axis0_1, axis0_2, axis0_3, axis1_1, axis1_2, axis1_3, axis2_1, axis2_2, axis2_3, axis3_1, axis3_2, axis3_3, axis4_1, axis4_2, axis4_3, axis5_1, axis5_2, axis5_3, axis6_1, axis6_2, axis6_3, axis7_1, axis7_2, axis7_3, axis8_1, axis8_2, axis8_3, axis9_1, axis9_2, axis9_3]

print(len(all_axes))

30


In [13]:
# make sure every axis has the same length (100)
for a in range(len(all_axes)):
    axis = all_axes[a]
    all_axes[a] = axis[:100]

In [14]:
# for every group of three axes, calculate the mean and standard deviation for each value in the list
# store in dict
axis_dict = {}

# initialize the dict for each axis 
for i in range(10):
    axis_dict[f"axis{i}"] = {"mean": [], "std": []}

i = 0
while i < len(all_axes):
    # each group of three axes represents the three interpretations of one axis
    interpret1 = all_axes[i]
    interpret2 = all_axes[i+1]
    interpret3 = all_axes[i+2]

    axis_num = i // 3

    axis_dict[f"axis{axis_num}"]["mean"] = [statistics.mean([interpret1[j], interpret2[j], interpret3[j]]) for j in range(len(interpret1))]
    axis_dict[f"axis{axis_num}"]["std"] = [statistics.stdev([interpret1[j], interpret2[j], interpret3[j]]) for j in range(len(interpret1))]
    
    i += 3 # increment by 3 to get to the next group of three axes

In [15]:
# round each value in mean lists to 3 decimal places
# and only save mean std values
for axis in axis_dict:
    axis_dict[axis]["mean"] = [round(val, 3) for val in axis_dict[axis]["mean"]]
    axis_dict[axis]["std"] = statistics.mean(axis_dict[axis]["std"])

In [16]:
# print overall mean std
print(statistics.mean([axis_dict[axis]["std"] for axis in axis_dict]))

0.40803333122455593


Note: still some variability (all values fall between -1 and 1)
- mean sd did increase a bit (before: 0.32)

## comparing old and generated embeddings for first 10 axes

In [28]:
# create dataframe with columns 0-9
df_llm = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# use mean values to populate dataframe; column numbers are the axis numbers
for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    df_llm[axis] = axis_dict[axis_title]["mean"]

# add words as first column
df_llm.insert(0, "word", words)

df_llm.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,9
0,abstract,-0.933,-0.467,0.867,-0.6,-0.333,0.867,-0.733,0.733,0.7,0.633
1,aesthetic,-0.433,-0.167,0.167,-0.033,-0.067,0.333,-0.233,0.3,0.3,0.2
2,acrylic,-0.733,-0.367,-0.333,-0.567,-0.367,-0.267,-0.833,-0.233,-0.233,-0.433
3,artistry,-0.267,-0.2,0.233,0.033,-0.2,0.267,-0.2,0.367,0.333,0.267
4,animation,1.0,0.567,0.8,0.767,0.6,0.767,0.9,0.9,0.867,0.933


In [29]:
# compare values in each column (axis) of df_llm to original dataframe df
# store mean difference and std for each axis

# initialize dict to store mean difference and std for each axis
axis_diff_dict = {}

# initialize dict for each axis
for i in range(10):
    axis_diff_dict[f"axis{i}"] = {"mean_diff": [], "std_diff": []}

# for each axis, calculate mean difference and std
for axis in range(len(axis_diff_dict)):
    diffs = [abs(df_llm[axis][i] - df[axis][i]) for i in range(len(df_llm))]
    mean_diff = statistics.mean(diffs)
    std_diff = statistics.stdev(diffs)

    axis_diff_dict[f"axis{axis}"]["mean_diff"] = mean_diff
    axis_diff_dict[f"axis{axis}"]["std_diff"] = std_diff

# print results
print(axis_diff_dict)

{'axis0': {'mean_diff': 0.5327571161950632, 'std_diff': 0.4100664811753058}, 'axis1': {'mean_diff': 0.5166050121065375, 'std_diff': 0.3529606134695156}, 'axis2': {'mean_diff': 0.5081664438122333, 'std_diff': 0.3537342589331988}, 'axis3': {'mean_diff': 0.49242681473456124, 'std_diff': 0.3644715338619721}, 'axis4': {'mean_diff': 0.44363717948717946, 'std_diff': 0.2883780919648976}, 'axis5': {'mean_diff': 0.4253441826215022, 'std_diff': 0.3141262955185306}, 'axis6': {'mean_diff': 0.5813144927536231, 'std_diff': 0.42067238835753906}, 'axis7': {'mean_diff': 0.5898348923959827, 'std_diff': 0.4151669481767916}, 'axis8': {'mean_diff': 0.46398391923990506, 'std_diff': 0.3384734200749216}, 'axis9': {'mean_diff': 0.5235712274368232, 'std_diff': 0.3503883226996408}}


In [30]:
# print overall mean difference
overall_mean = statistics.mean([axis_diff_dict[axis]["mean_diff"] for axis in axis_diff_dict])
mean_uncertainty = overall_mean / 2
print(f"mean diff: {overall_mean} (uncertainty: {mean_uncertainty:.2f})")

# print overall mean std
overall_std = statistics.mean([axis_diff_dict[axis]["std_diff"] for axis in axis_diff_dict])
std_uncertainty = overall_std / 2
print(f"std of diffs: {overall_std} (uncertainty: {std_uncertainty:.2f})")

mean diff: 0.5077641280783411 (uncertainty: 0.25)
std of diffs: 0.3608438354232314 (uncertainty: 0.18)


Interesting... with more detailed descriptions, mean + std of diffs increased a bit

## cosine similarity

In [33]:
# import cosine similarity function
from sklearn.metrics.pairwise import cosine_similarity

In [40]:
# compare cosine similarity of original and synthesized embeddings
# ❓ question: should we be comparing by axis or word?

# try columns first (by axis)
# compute cosine similarity for each axis in df_llm and df
# store in list
axis_cos_sim_list = []

# also try by word (row)
# compute cosine similarity for each word in df_llm and df
# store in list 
word_cos_sim_list = []

for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    axis_cos_sim = cosine_similarity([df_llm[axis]], [df[axis]])
    axis_cos_sim = axis_cos_sim[0][0]
    axis_cos_sim_list.append(axis_cos_sim)

for word in range(len(df_llm)):
    # crop rows in df to 10 dimensions
    word_cos_sim = cosine_similarity([df_llm.iloc[word][1:]], [df.iloc[word][1:11]])
    word_cos_sim = word_cos_sim[0][0]
    word_cos_sim_list.append(word_cos_sim)

In [41]:
# see results
print(axis_cos_sim_list)
print(word_cos_sim_list)

[-0.003602043227383958, -0.16341921013153243, -0.17235485498635827, -0.0935585422593968, 0.029669460362598668, 0.09148389220452216, -0.11846114554109186, -0.21247530026750175, -0.019203245044967537, -0.11888954281012086]
[-0.2953101039312527, -0.3741989959961967, -0.15670158656761743, -0.4818266647199914, 0.0805700989115944, -0.6334966875556708, -0.12180455144432575, -0.2221732846689293, -0.014703929987126656, 0.08563457530130435, -0.16721813048776252, 0.08955094464477055, -0.30001403425589585, -0.43240577995831353, 0.1654936628569716, 0.6120678871224687, 0.14343733810879955, -0.15227112427319217, -0.36004930604677104, 0.3189442389072261, 0.5830190787917917, -0.3252876122806961, 0.27611285857729745, -0.6847511972043232, 0.26854027187873186, -0.30640289473404975, -0.4123024459428165, -0.09663704681343666, 0.030416541458387132, -0.06888445263392479, -0.5390577239629659, -0.25849028599404933, 0.20877605277765526, 0.3739310667316866, 0.1453925050541071, -0.18534251011823674, -0.34150721046

In [42]:
# take average of each list
axis_cos_sim_avg = statistics.mean(axis_cos_sim_list)
word_cos_sim_avg = statistics.mean(word_cos_sim_list)

# print results
print(f"axis cos sim avg: {axis_cos_sim_avg}")
print(f"word cos sim avg: {word_cos_sim_avg}")

axis cos sim avg: -0.07808105317012326
word cos sim avg: -0.1446461328745161


In [53]:
# also print max, min, and std
print(f"axis cos sim max: {max(axis_cos_sim_list)}")
print(f"axis cos sim min: {min(axis_cos_sim_list)}")
print(f"axis cos sim std: {statistics.stdev(axis_cos_sim_list)}")

print(f"word cos sim max: {max(word_cos_sim_list)}")
print(f"word cos sim min: {min(word_cos_sim_list)}")
print(f"word cos sim std: {statistics.stdev(word_cos_sim_list)}")

axis cos sim max: 0.09148389220452216
axis cos sim min: -0.21247530026750175
axis cos sim std: 0.09840029631197905
word cos sim max: 0.6700976132267067
word cos sim min: -0.7166797246141924
word cos sim std: 0.33015266138914245


Notes: oof, not very good 😅

## test accuracy

In [45]:
# new list of words to test
test_words = ['pointillism',
 'maquette',
 'gesso',
 'decoupage',
 'encaustic',
 'figurative',
 'grid',
 'hatching',
 'intaglio',
 'juxtapose',
 'kiln',
 'lithography',
 'monotype',
 'negative space',
 'ochre',
 'patina',
 'quinacridone',
 'raku',
 'sfumato',
 'tenebrism']

In [46]:
# get new embeddings
# get embedding for each class
# ❗️ note: I am averaging the embeddings for each word in the class
# ❓ question: are we interested in the final contextual embedding for each class? currently, we're looking at the final hidden state.
test_embeddings = []
for i in range(len(test_words)):
    input_ids = torch.tensor(tokenizer.encode(words[i])).unsqueeze(0)
    outputs = model(input_ids)
    last_hidden_states = outputs[0]
    # skip the first token, which is the [CLS] token, and skip the last token, which is the [SEP] token
    # average the rest of the tokens
    test_embeddings.append(last_hidden_states[0][1:-1].mean(dim=0).tolist())

In [47]:
# sanity check
print(len(test_embeddings))
print(len(test_embeddings[0]))

20
768


In [49]:
# round each val in embedding to 3 decimal places
test_embeddings = [list(np.around(np.array(e),3)) for e in test_embeddings]

# create string of all classes and their embeddings & save to text file
# ❗️ note: only taking first 10 axes for now due to context window length
with open("output.txt", "w") as text_file:
    for i in range(len(test_words)):
        class_str = f"{test_words[i]}: {test_embeddings[i][:10]}\n"
        text_file.write(class_str)

In [51]:
# convert to dataframe
df_test = pd.DataFrame(test_embeddings)
df_test.insert(0, 'word', test_words)

# sanity check
df_test.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,pointillism,0.286,0.395,-0.382,-0.242,0.407,0.01,-0.199,0.06,0.203,...,0.734,-0.119,0.476,0.058,0.239,-0.068,0.101,0.026,0.26,-0.127
1,maquette,0.249,0.566,-0.123,-0.117,0.271,0.083,0.036,-0.069,0.083,...,0.516,0.024,0.273,-0.004,0.114,-0.206,0.08,0.043,0.187,0.201
2,gesso,0.217,0.234,-0.019,0.087,0.777,-0.107,-0.655,0.548,-0.061,...,0.254,-0.524,0.176,0.345,0.337,-0.254,-0.499,-0.021,0.162,0.142
3,decoupage,0.147,0.263,-0.044,-0.078,0.66,0.147,-0.043,-0.021,-0.12,...,0.639,-0.331,0.187,0.041,-0.143,-0.029,0.11,-0.205,0.363,-0.308
4,encaustic,-0.006,0.449,-0.484,0.105,0.504,0.228,0.095,0.199,-0.532,...,0.55,-0.206,0.477,0.005,0.14,-0.114,0.244,-0.032,0.513,0.498


In [52]:
# normalize each column to be between -1 and 1
scaler = MinMaxScaler(feature_range=(-1,1))
df_test.iloc[:,1:] = scaler.fit_transform(df_test.iloc[:,1:])

# sanity check
df_test.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,...,758,759,760,761,762,763,764,765,766,767
0,pointillism,0.223359,0.318876,-0.433628,-0.040491,-0.568465,-0.184729,0.028759,-0.191697,0.756272,...,0.048035,0.387226,0.32459,0.111455,0.035758,-0.179551,0.398601,0.252101,0.033604,-0.247505
1,maquette,0.178808,0.593574,0.330383,0.266258,-0.850622,-0.004926,0.414955,-0.506716,0.469534,...,-0.427948,0.672655,-0.340984,-0.080495,-0.272503,-0.523691,0.34965,0.29972,-0.135574,0.407186
2,gesso,0.140277,0.060241,0.637168,0.766871,0.19917,-0.472906,-0.720624,1.0,0.125448,...,-1.0,-0.421158,-0.659016,1.0,0.277435,-0.643392,-1.0,0.120448,-0.193511,0.289421
3,decoupage,0.05599,0.106827,0.563422,0.361963,-0.043568,0.152709,0.285127,-0.389499,-0.015532,...,-0.159389,-0.035928,-0.622951,0.058824,-0.906289,-0.082294,0.41958,-0.394958,0.272306,-0.608782
4,encaustic,-0.128236,0.405622,-0.734513,0.811043,-0.36722,0.352217,0.511915,0.147741,-1.0,...,-0.353712,0.213573,0.327869,-0.052632,-0.208385,-0.294264,0.731935,0.089636,0.61993,1.0


In [54]:
# values assigned by chatgpt
# generated vals for each axis
# ❗️ note: these may not be the right length -- need to check/fix later
axis0_1 = [-0.8, -0.5, -0.7, -0.4, -0.6, -0.3, 0.2, -0.1, -0.5, 0.3, -0.4, -0.6, -0.2, 0.1, -0.3, -0.5, 0.4, -0.2, -0.7, -0.6]
axis0_2 = [-0.8, -0.2, -0.6, -0.4, -0.6, -0.1, 0, -0.2, -0.7, 0.2, -0.5, -0.6, -0.3, 0.1, -0.2, -0.1, 0.3, -0.4, -0.7, -0.8]
axis0_3 = [-0.6, 0, -0.2, 0, -0.4, -0.5, 0.5, -0.1, -0.4, 0.3, -0.3, -0.2, 0, 0.2, -0.2, -0.1, 0.6, 0.1, -0.5, -0.4]
axis1_1 = [-0.6, 0.2, -0.3, -0.2, 0.1, -0.5, -0.7, -0.6, -0.4, 0.3, -0.2, -0.5, -0.1, 0.4, -0.3, -0.4, 0.5, 0.2, -0.6, -0.5]
axis1_2 = [-0.3, -0.2, -0.1, -0.4, -0.2, -0.1, 0.1, -0.2, -0.3, 0.2, -0.2, -0.3, -0.1, 0.1, -0.1, -0.2, 0.3, -0.2, -0.3, -0.4]
axis1_3 = [-0.5, 0, -0.2, 0.1, -0.3, -0.1, 0.4, -0.6, -0.2, 0.3, 0, -0.3, 0, 0.2, 0, -0.1, 0.5, 0, -0.4, 0.6]
axis2_1 = [0.5, 0.3, 0.2, 0.6, 0.4, 0.1, 0.7, 0.5, 0.3, 0.8, 0.2, 0.1, 0.6, 0.9, 0.2, 0.1, 0.7, 0.4, 0.3, 0.2]
axis2_2 = [0.4, 0.2, 0.3, 0.5, 0.3, 0.1, 0.2, 0.1, 0.2, 0.6, 0.2, 0.3, 0.4, 0.5, 0.2, 0.1, 0.6, 0.3, 0.2, 0.3]
axis2_3 = [0.4, 0, 0, 0.5, 0.6, 0.1, 0.2, 0.3, 0.1, 0.7, 0, 0.2, 0.3, 0.5, 0, 0, 0.4, 0, 0.2, 0.3]
axis3_1 = [-0.7, -0.5, -0.6, -0.4, -0.5, -0.3, -0.8, -0.7, -0.6, 0.4, -0.5, -0.6, -0.4, 0.1, -0.3, -0.5, 0.3, -0.2, -0.7, -0.6]
axis3_2 = [-0.6, -0.4, -0.5, -0.3, -0.5, -0.2, -0.1, -0.3, -0.6, 0.2, -0.4, -0.5, -0.3, 0.1, -0.2, -0.3, 0.4, -0.4, -0.6, -0.7]
axis3_3 = [-0.5, 0, -0.4, 0.3, -0.6, -0.7, -0.2, -0.5, -0.3, 0.2, -0.2, -0.4, 0.1, 0.3, -0.3, -0.1, 0.4, 0, -0.6, -0.4]
axis4_1 = [-0.4, -0.2, -0.3, 0.2, -0.1, -0.5, -0.6, -0.5, -0.3, 0.5, -0.2, -0.4, -0.1, 0.3, -0.2, -0.3, 0.4, -0.1, -0.5, -0.4]
axis4_2 = [-0.3, -0.2, -0.1, 0.2, -0.2, -0.1, 0, -0.2, -0.3, 0.3, -0.2, -0.3, -0.1, 0.4, -0.1, -0.2, 0.3, -0.2, -0.3, -0.4]
axis4_3 = [-0.2, 0, 0, 0.3, -0.1, -0.4, 0.5, -0.3, -0.2, 0.4, 0, -0.1, 0.2, 0.6, 0, -0.1, 0.5, 0, -0.3, 0.7]
axis5_1 = [0.6, 0.4, 0.3, 0.7, 0.5, 0.2, 0.8, 0.6, 0.4, 0.9, 0.3, 0.2, 0.7, 1.0, 0.3, 0.2, 0.8, 0.5, 0.4, 0.3]
axis5_2 = [0.5, 0.3, 0.4, 0.6, 0.4, 0.2, 0.3, 0.2, 0.3, 0.7, 0.3, 0.4, 0.5, 0.6, 0.3, 0.2, 0.7, 0.4, 0.3, 0.4]
axis5_3 = [0.5, 0, 0.1, 0.6, 0.7, 0.4, 0.3, 0.2, 0.3, 0.8, 0.1, 0.2, 0.4, 0.6, 0, 0.1, 0.5, 0, 0.3, 0.2]
axis6_1 = [-0.7, -0.5, -0.6, -0.4, -0.6, -0.3, -0.8, -0.7, -0.6, 0.3, -0.5, -0.6, -0.4, 0.2, -0.3, -0.5, 0.4, -0.2, -0.7, -0.6]
axis6_2 = [-0.7, -0.5, -0.6, -0.4, -0.6, -0.3, -0.2, -0.5, -0.7, 0.3, -0.5, -0.6, -0.4, 0.2, -0.3, -0.4, 0.5, -0.5, -0.7, -0.8]
axis6_3 = [-0.7, -0.5, -0.6, -0.3, -0.8, -0.4, 0.2, -0.2, -0.7, 0.4, -0.6, -0.5, 0.1, 0.3, -0.1, -0.2, 0.6, 0, -0.5, -0.3]
axis7_1 = [0.6, 0.4, 0.3, 0.7, 0.5, 0.2, 0.8, 0.6, 0.4, 0.9, 0.3, 0.5, 0.7, 1.0, 0.3, 0.2, 0.8, 0.5, 0.4, 0.3]
axis7_2 = [0.6, 0.4, 0.5, 0.7, 0.5, 0.3, 0.4, 0.3, 0.5, 0.8, 0.4, 0.5, 0.6, 0.7, 0.4, 0.3, 0.8, 0.5, 0.5, 0.6]
axis7_3 = [0.5, 0, 0.1, 0.6, 0.7, 0.4, 0.3, 0.2, 0.3, 0.8, 0.1, 0.2, 0.4, 0.6, 0, 0.1, 0.5, 0, 0.3, 0.2]
axis8_1 = [0.6, 0.4, 0.3, 0.7, 0.5, 0.2, 0.8, 0.6, 0.4, 0.9, 0.3, 0.5, 0.7, 1.0, 0.3, 0.2, 0.8, 0.5, 0.4, 0.3]
axis8_2 = [0.5, 0.3, 0.4, 0.6, 0.4, 0.2, 0.3, 0.2, 0.4, 0.7, 0.3, 0.4, 0.5, 0.6, 0.3, 0.2, 0.7, 0.4, 0.4, 0.5]
axis8_3 = [0.4, 0, 0.2, 0.5, 0.6, 0.3, 0.1, 0.3, 0.2, 0.7, 0, 0.3, 0.4, 0.5, 0, 0, 0.4, 0, 0.2, 0.3]
axis9_1 = [0.5, 0.3, 0.4, 0.6, 0.5, -0.1, 0.7, 0.5, 0.4, 0.8, 0.2, 0.3, 0.6, -0.2, 0.1, 0.3, 0.7, 0.4, 0.5, 0.4]
axis9_2 = [0.4, 0.2, 0.3, 0.5, 0.3, 0.1, 0.2, 0.1, 0.3, 0.6, 0.2, 0.3, 0.4, 0.5, 0.2, 0.1, 0.6, 0.3, 0.2, 0.4]
axis9_3 = [0.3, 0, 0.2, 0.5, 0.4, 0.1, 0.6, 0.2, 0.1, 0.7, 0, 0.3, 0.5, 0.6, -0.1, 0.1, 0.4, 0, 0.3, 0.2]

# combine all the axis data into a list of lists
all_axes = [axis0_1, axis0_2, axis0_3, axis1_1, axis1_2, axis1_3, axis2_1, axis2_2, axis2_3, axis3_1, axis3_2, axis3_3, axis4_1, axis4_2, axis4_3, axis5_1, axis5_2, axis5_3, axis6_1, axis6_2, axis6_3, axis7_1, axis7_2, axis7_3, axis8_1, axis8_2, axis8_3, axis9_1, axis9_2, axis9_3]

print(len(all_axes))

30


In [55]:
# make sure every axis has the same length (20)
for a in range(len(all_axes)):
    axis = all_axes[a]
    all_axes[a] = axis[:20]

In [56]:
# for every group of three axes, calculate the mean and standard deviation for each value in the list
# store in dict
axis_dict = {}

# initialize the dict for each axis 
for i in range(10):
    axis_dict[f"axis{i}"] = {"mean": [], "std": []}

i = 0
while i < len(all_axes):
    # each group of three axes represents the three interpretations of one axis
    interpret1 = all_axes[i]
    interpret2 = all_axes[i+1]
    interpret3 = all_axes[i+2]

    axis_num = i // 3

    axis_dict[f"axis{axis_num}"]["mean"] = [statistics.mean([interpret1[j], interpret2[j], interpret3[j]]) for j in range(len(interpret1))]
    axis_dict[f"axis{axis_num}"]["std"] = [statistics.stdev([interpret1[j], interpret2[j], interpret3[j]]) for j in range(len(interpret1))]
    
    i += 3 # increment by 3 to get to the next group of three axes

In [57]:
# round each value in mean lists to 3 decimal places
# and only save mean std values
for axis in axis_dict:
    axis_dict[axis]["mean"] = [round(val, 3) for val in axis_dict[axis]["mean"]]
    axis_dict[axis]["std"] = statistics.mean(axis_dict[axis]["std"])

# print overall mean std
print(statistics.mean([axis_dict[axis]["std"] for axis in axis_dict]))

0.15485851551435226


smaller std than training data (0.41)

In [59]:
# compare original test embeddings to the generated embeddings
# create dataframe with columns 0-9
df_test_llm = pd.DataFrame(columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# use mean values to populate dataframe; column numbers are the axis numbers
for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    df_test_llm[axis] = axis_dict[axis_title]["mean"]

# add words as first column
df_test_llm.insert(0, "word", test_words)

df_test_llm.head()

Unnamed: 0,word,0,1,2,3,4,5,6,7,8,9
0,pointillism,-0.733,-0.467,0.433,-0.6,-0.3,0.533,-0.7,0.567,0.5,0.4
1,maquette,-0.233,0.0,0.167,-0.3,-0.133,0.233,-0.5,0.267,0.233,0.167
2,gesso,-0.5,-0.2,0.167,-0.5,-0.133,0.267,-0.6,0.3,0.3,0.3
3,decoupage,-0.267,-0.167,0.533,-0.133,0.233,0.633,-0.367,0.667,0.6,0.533
4,encaustic,-0.533,-0.133,0.433,-0.533,-0.133,0.533,-0.667,0.567,0.5,0.4


In [60]:
# compare values in each column (axis) of df_llm to original dataframe df
# store mean difference and std for each axis

# initialize dict to store mean difference and std for each axis
axis_diff_dict = {}

# initialize dict for each axis
for i in range(10):
    axis_diff_dict[f"axis{i}"] = {"mean_diff": [], "std_diff": []}

# for each axis, calculate mean difference and std
for axis in range(len(axis_diff_dict)):
    diffs = [abs(df_test_llm[axis][i] - df_test[axis][i]) for i in range(len(df_test_llm))]
    mean_diff = statistics.mean(diffs)
    std_diff = statistics.stdev(diffs)

    axis_diff_dict[f"axis{axis}"]["mean_diff"] = mean_diff
    axis_diff_dict[f"axis{axis}"]["std_diff"] = std_diff

# print results
print(axis_diff_dict)

{'axis0': {'mean_diff': 0.6754966887417219, 'std_diff': 0.3583434990330248}, 'axis1': {'mean_diff': 0.4916833333333334, 'std_diff': 0.3834830105335782}, 'axis2': {'mean_diff': 0.5403995575221239, 'std_diff': 0.3868672607479474}, 'axis3': {'mean_diff': 0.6651049079754602, 'std_diff': 0.40365020321555467}, 'axis4': {'mean_diff': 0.40098485477178425, 'std_diff': 0.24555193552953924}, 'axis5': {'mean_diff': 0.5576098522167489, 'std_diff': 0.33138590815560787}, 'axis6': {'mean_diff': 0.6675563681183236, 'std_diff': 0.39243586794553603}, 'axis7': {'mean_diff': 0.6640804639804639, 'std_diff': 0.4084987757670915}, 'axis8': {'mean_diff': 0.6016786140979689, 'std_diff': 0.3866735927781845}, 'axis9': {'mean_diff': 0.5840078986587182, 'std_diff': 0.40683836259275175}}


In [61]:
# print overall mean difference
overall_mean = statistics.mean([axis_diff_dict[axis]["mean_diff"] for axis in axis_diff_dict])
mean_uncertainty = overall_mean / 2
print(f"mean diff: {overall_mean} (uncertainty: {mean_uncertainty:.2f})")

# print overall mean std
overall_std = statistics.mean([axis_diff_dict[axis]["std_diff"] for axis in axis_diff_dict])
std_uncertainty = overall_std / 2
print(f"std of diffs: {overall_std} (uncertainty: {std_uncertainty:.2f})")

mean diff: 0.5848602539416647 (uncertainty: 0.29)
std of diffs: 0.3703728416298816 (uncertainty: 0.19)


Higher mean and std oops

In [62]:
# try cosine similarity too
# compare cosine similarity of original and synthesized embeddings
# ❓ question: should we be comparing by axis or word?

# try columns first (by axis)
# compute cosine similarity for each axis in df_llm and df
# store in list
axis_cos_sim_list = []

# also try by word (row)
# compute cosine similarity for each word in df_llm and df
# store in list 
word_cos_sim_list = []

for axis in range(len(axis_dict)):
    axis_title = f"axis{axis}"
    axis_cos_sim = cosine_similarity([df_test_llm[axis]], [df_test[axis]])
    axis_cos_sim = axis_cos_sim[0][0]
    axis_cos_sim_list.append(axis_cos_sim)

for word in range(len(df_test_llm)):
    # crop rows in df to 10 dimensions
    word_cos_sim = cosine_similarity([df_test_llm.iloc[word][1:]], [df_test.iloc[word][1:11]])
    word_cos_sim = word_cos_sim[0][0]
    word_cos_sim_list.append(word_cos_sim)

In [63]:
# take average of each list
axis_cos_sim_avg = statistics.mean(axis_cos_sim_list)
word_cos_sim_avg = statistics.mean(word_cos_sim_list)

# print results
print(f"axis cos sim avg: {axis_cos_sim_avg}")
print(f"word cos sim avg: {word_cos_sim_avg}")

# also print max, min, and std
print(f"axis cos sim max: {max(axis_cos_sim_list)}")
print(f"axis cos sim min: {min(axis_cos_sim_list)}")
print(f"axis cos sim std: {statistics.stdev(axis_cos_sim_list)}")

print(f"word cos sim max: {max(word_cos_sim_list)}")
print(f"word cos sim min: {min(word_cos_sim_list)}")
print(f"word cos sim std: {statistics.stdev(word_cos_sim_list)}")

axis cos sim avg: -0.16115959248552528
word cos sim avg: -0.2006675040543418
axis cos sim max: 0.4463631828605409
axis cos sim min: -0.5987086488286419
axis cos sim std: 0.3029051434591887
word cos sim max: 0.3961099288817471
word cos sim min: -0.6635811879790965
word cos sim std: 0.29855974547635933


hmm... yeah the cosine similarity is not great

## grouping repeatitive axes

Initial attempts to group axes with semantically similar/redundant interpretations. Here's the prompt I used:

```
Given this list: 
# insert here

Please produce a dictionary that groups semantically similar/redundant items together. Remember that order matters (the phrase on the left vs. right of "vs")-- so even if two items have the similar phrases semantically, unless they appear in the same general order, they shouldn't be grouped together. Your output should look like this:

{group 1: [item, item, ...], group 2: [item, item, ...], etc.}

But come up with a short phrase to name each group. Try to reduce the number of items in the dictionary from the original list (i.e., since the original list had 10 items, your dictionary should have less).

Please briefly explain your groupings at the end. Remember to format your answer as requested above and please make sure the order of phrases matches in your groupings!
```

In [31]:
# results
groups = {
    "Traditional vs Modern Art Forms": [
        'traditional or classical art forms and techniques vs modern or contemporary art mediums and practices',
        'traditional, physical art mediums and tools vs modern, innovative, conceptual art methods',
        'traditional, classic, monochromatic art styles vs modern, innovative, colorful art techniques'
    ],
    "Artistic Elements and Movements": [
        'basic, linear, and detailed artistic elements vs bold, expressive, and modern art styles and movements',
        'structured, traditional, historical art styles vs imaginative, creative, innovative art expressions',
        'realistic, serious, romantic art forms vs vibrant, expressive, satirical art styles'
    ],
    "Abstract vs Traditional Art": [
        'abstract, unconventional, avant-garde art vs classical, traditional, technique-focused art styles',
        'abstract, modernist, emotionally expressive art vs concrete, traditional, technical art elements'
    ],
    "Expression vs Structure in Art": [
        'contemporary, expressive, vibrant art vs structured, subdued, classic art forms',
        'modern, innovative, institutional art movements vs classic, personal, traditional art concepts'
    ]
}


notes: 
- groups seem reasonable
- ❓ question: do we care about order in the interpretation? e.g., if one axis encodes negative vs. positive and another encodes postive vs. negative, they likely target the same aspect, but in opposite directions.
    - i'm currently taking order into account, but could remove that part. 
    - w/o order, would probably be able to reduce # of groups more(?)