In [None]:
import matplotlib as plt
import seaborn as sns
import numpy as np
import pandas as pd
tele_df = pd.read_excel('telestrations-data.xlsx', sheet_name='anon')

In [None]:
tele_df.head(2)

Adding position column to count order using the index

# Data Transformation

### Adding a position column
The position column represents the order that notebooks follow (assuming that the natural order of the rows in the data corresponds with the Telestrations notebook path)

In [None]:
tele_df['Position'] = tele_df.reset_index()['index']
tele_df

In [None]:
tele_df.columns

### Melting the data for better usability

In [None]:
melted_tele_df = pd.melt(tele_df, value_vars=[1,2,3,4,5], value_name='Prompt',var_name='Round', id_vars=['Names','Position'])
melted_tele_df

Our 'Names' column is a little misleading. Our names really just corresponds to whoever started with the notebook, so we'll rename this column to 'Notebook'

In [None]:
# Rename name to represent the notebook
melted_tele_df = melted_tele_df.rename(columns={'Names':'Notebook' })

## Adding columns to support our comparisons

### Finding the Drawer of 'Prompt'

In [None]:
max_position = melted_tele_df['Position'].max()

# Getting the 'Position' of the person that drew 'Prompt'
melted_tele_df['drawer_position'] = melted_tele_df.apply(lambda row: ((row['Position'] + 1 + 2*(row['Round']-1)) % (max_position+1)), axis=1)

# Getting the actual name of the person that drew 'Prompt'
melted_tele_df['Drawer'] = melted_tele_df.apply(lambda row:
    melted_tele_df[(melted_tele_df['Position'] == row['drawer_position']) & (melted_tele_df['Round'] == 1)]['Notebook'].iloc[0], axis=1)
melted_tele_df

### Finding the Guesser of the drawing of 'Prompt'

In [None]:
# Getting the 'Position' of the person that guessed the drawer's rendition of 'Prompt'
melted_tele_df['guesser_position'] = melted_tele_df.apply(lambda row: ((row['Position'] + 2 + 2*(row['Round']-1)) % (max_position+1)), axis=1)
# Getting the actual of the person that guessed the drawer's rendition of 'Prompt'
melted_tele_df['Guesser'] = melted_tele_df.apply(lambda row: melted_tele_df[(melted_tele_df['Position']==row['guesser_position']) & (melted_tele_df['Round'] == 1)]['Notebook'].iloc[0], axis=1)
melted_tele_df

In [None]:
melted_tele_df.sort_values(by=['Position','Round'],ascending=[False,True])

### Finding the Guesser's Guess

In [None]:
max_round = melted_tele_df['Round'].max()

melted_tele_df['Next Round'] = melted_tele_df.apply(lambda row: row['Round'] + 1 if row['Round'] < max_round else -1, axis=1)

melted_tele_df['Guess'] = melted_tele_df.apply(lambda row: melted_tele_df[
    (melted_tele_df['Round'] == row['Next Round']) & (melted_tele_df['Notebook'] == row['Notebook'])
]['Prompt'].iloc[0] if row['Next Round'] != -1 else '', axis=1)
melted_tele_df

Great! Now we have the necessary data to make our calculations. Before we do that we're first going to create a DataFrame that removes unnecessary columns

In [None]:
# Removing 's from text
melted_tele_df['Prompt'] = melted_tele_df['Prompt'].apply(lambda x: x.replace("'","").lower())
melted_tele_df['Guess'] = melted_tele_df['Guess'].apply(lambda x: x.replace("'","").lower())

In [None]:
tele_df_clean = melted_tele_df[['Notebook','Round','Drawer','Guesser','Prompt','Guess']]
tele_df_clean = tele_df_clean[tele_df_clean['Round'] < max_round]

# Finding the similarity between prompts and guesses

### Load Embeddings

In [None]:
import embeddings
embeddings.add_embeddings(tele_df_clean['Prompt'].unique().tolist())
embeddings.add_embeddings(tele_df_clean['Guess'].unique().tolist())
prompt_embeddings = embeddings.load_embeddings()

In [None]:
tele_df_clean['cosine_similarity'] = tele_df_clean.apply(lambda row: embeddings.get_cosine_similarity(prompt_embeddings[row['Prompt']],prompt_embeddings[row['Guess']]), axis=1)
tele_df_clean.sort_values('cosine_similarity')

In [None]:
tele_df_clean['cosine_similarity'].describe()

## Scoring

We can see that bad guesses still land at around 0.75 cosine similarity, so when creating this new scale we're going to set 0.75 as the minimum score and 1 as the maximum score, and then set this on a range from 1 to 10.

### Linear Scoring

In [None]:
# Linear score
floor = 0.75
max_score = 10

tele_df_clean['Linear_Score'] = tele_df_clean['cosine_similarity'].apply(lambda x: max_score * ((x - floor) / (1 - floor)) if x > floor else 0)

sns.jointplot(x=tele_df_clean['cosine_similarity'], y=tele_df_clean['Linear_Score'])

### Logarithmic Scoring

In [None]:
# Logarithmic score

floor = 0.75
max_score = 10

def compute_a(b):
    return 10 / np.log(b * 0.25 + 1)

b = 50
a = compute_a(b)

tele_df_clean['Log_Score'] = tele_df_clean['cosine_similarity'].apply(lambda x: a * np.log(b * (x - floor) + 1) if x > floor else 0)
sns.jointplot(x=tele_df_clean['cosine_similarity'], y=tele_df_clean['Log_Score'])
tele_df_clean

### Logistic Scoring

In [None]:
import numpy as np

floor = 0.75
max_score = 10
k = 40  # Adjust for steeper or more gradual transitions
x_0 = 0.875  # Midpoint

def logistic(x):
    return max_score / (1 + np.exp(-k*(x - x_0)))

def normalized_score(x, floor=0.75, max_val=1, max_score=10):
    if (x < floor):
        return 0
    else:
        return (logistic(x) - logistic(floor)) / (logistic(max_val) - logistic(floor)) * max_score

def compute_score(x):
    min_val = logistic(floor)
    max_val = logistic(1)
    return normalized_score(x, min_val, max_val)

tele_df_clean['Logistic_Score'] = tele_df_clean['cosine_similarity'].apply(compute_score)
tele_df_clean['Prompt->Guess'] = tele_df_clean.apply(lambda row: f'{row["Prompt"]}->{row["Guess"]}', axis=1)
tele_df_clean['Drawer->Guesser'] = tele_df_clean.apply(lambda row: f'{row["Drawer"]}->{row["Guesser"]}', axis=1)




sns.jointplot(x=tele_df_clean['cosine_similarity'], y=tele_df_clean['Logistic_Score'])


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from adjustText import adjust_text

# Adjust the figure size
plt.figure(figsize=(15, 10))

# Create the scatter plot
ax = sns.scatterplot(x=tele_df_clean['cosine_similarity'],
                     y=tele_df_clean['Logistic_Score'],
                     hue=tele_df_clean['Drawer->Guesser'],
                     palette='hls')

# Get the PathCollection which represents the data points in scatterplot
path_collection = ax.collections[0]

# Extract the colors from the PathCollection
colors = path_collection.get_facecolor()

texts = []
for line in range(0, tele_df_clean.shape[0]):
    texts.append(plt.text(tele_df_clean['cosine_similarity'].iloc[line],
                          tele_df_clean['Logistic_Score'].iloc[line],
                          tele_df_clean['Prompt->Guess'].iloc[line],
                          horizontalalignment='left',
                          size='small',
                          color=colors[line]))

# Adjust text to minimize overlaps
adjust_text(texts)

# Show the plot
plt.tight_layout()
plt.show()





# import seaborn as sns
# import matplotlib.pyplot as plt
# from adjustText import adjust_text
#
# # Adjust the figure size
# plt.figure(figsize=(15, 10))
#
# # Create the scatter plot
# ax = sns.scatterplot(x=tele_df_clean['cosine_similarity'],
#                      y=tele_df_clean['Logistic_Score'],
#                      hue=tele_df_clean['Drawer'])
#
# texts = []
# for line in range(0, tele_df_clean.shape[0]):
#     texts.append(ax.text(tele_df_clean['cosine_similarity'].iloc[line],
#                          tele_df_clean['Logistic_Score'].iloc[line],
#                          tele_df_clean['Prompt->Guess'].iloc[line],
#                          horizontalalignment='left',
#                          size='small',
#                          color='black'))
#
# # Adjust text to minimize overlaps
# adjust_text(texts)
#
# # Show the plot
# plt.tight_layout()
# plt.show()


In [None]:
tele_df_final = tele_df_clean.drop(columns=['Linear_Score','Log_Score']).rename(columns={'Logistic_Score':'Score'})
tele_df_final.sort_values(by=['Notebook','Round'])

# Final Scoreboard

In [None]:
drawing_guessing_weight = 2

# Get series with total drawer/guesser scores by person
drawer_scores = tele_df_final.groupby('Drawer')['Score'].sum()
guesser_scores = tele_df_final.groupby('Guesser')['Score'].sum()

# merge scores
all_scores = pd.merge(drawer_scores.to_frame().reset_index(), guesser_scores.to_frame().reset_index(), left_on='Drawer',right_on='Guesser',suffixes=['_Drawing','_Guessing'])

# Drop one of the names and rename the other to 'Name'
all_scores = all_scores.drop(columns='Guesser').rename(columns={'Drawer':'Name'})

# # Calculate weighted scores based on drawing_guessing_weight. With that set to 2 we value drawing twice as much as guessing
all_scores['Score_Drawing'] = (all_scores['Score_Drawing'] * drawing_guessing_weight) / (1 + drawing_guessing_weight)
all_scores['Score_Guessing'] = all_scores['Score_Guessing'] / (1 + drawing_guessing_weight)
all_scores['Composite_Score'] = all_scores['Score_Drawing'] + all_scores['Score_Guessing']
all_scores


In [None]:
all_scores = all_scores.sort_values(by='Composite_Score')

ax = sns.barplot(x=all_scores['Name'], y=all_scores['Composite_Score'])

# Add chart title
ax.set_title('Telestrations Scoreboard')

# Set x-axis label rotation to vertical
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
sns.barplot(x=all_scores['Name'], y=all_scores['Composite_Score']);

# Visualizing Notebooks Deviation Round over Round

To do this we need to first pull in the original prompt into the dataframe

In [None]:
analysis_df = tele_df_final
analysis_df

In [None]:
# Pull in original prompt
analysis_df['Original Prompt'] = analysis_df.apply(lambda row: analysis_df[
    (analysis_df['Notebook'] == row['Notebook']) & (analysis_df['Round'] == 1)]['Prompt'].iloc[0], axis=1)
analysis_df.sort_values(by='Notebook')

In [None]:
# Cosine Similarity
analysis_df['cosine_sim_w_original'] = analysis_df.apply(lambda row: embeddings.get_cosine_similarity(prompt_embeddings[row['Original Prompt']],prompt_embeddings[row['Guess']]), axis=1)
analysis_df.sort_values('Notebook')
ax = sns.lineplot(x=analysis_df['Round'],y=analysis_df['cosine_sim_w_original'],hue=analysis_df['Notebook'])

In [None]:

# Annotate the lines
lines = ax.get_lines()
labels = analysis_df['Notebook'].unique()

for line, label in zip(lines, labels):
    y = line.get_ydata()[-1]
    x = line.get_xdata()[-1]
    ax.text(x, y, f'{label}', color=line.get_color(), weight='bold', verticalalignment='center')
ax

In [None]:
sns.boxplot(data=analysis_df, x='Round', y='cosine_sim_w_original', palette='rocket')

In [None]:
sns.boxplot(data=analysis_df, x='Round', y='cosine_similarity', palette='rocket')

In [None]:
analysis_df[analysis_df['Notebook'] == 'Tyler']

Round over round cosine dissimilarity

Cosine dissimilarity to starting prompt by round

# Next steps to explore

Quantifying how hard a prompt is to draw