In [16]:
import json
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

In [17]:
# from google.colab import drive
# drive.mount('/content/drive')

In [18]:
confusion_path = os.path.join(os.path.curdir, "confusions")
data_path = os.path.join(os.path.curdir, "data_resplit_v2")

# Configuration

In [19]:
size = 40000 # size of dataset
test_signs = 200 # number of signs that we're trying to learn
train_signs = 400 # number of signs that we trained on (test_signs is a subset)
seed = 43 # 42 - 46 (5 runs each)

# Loading Data

In [20]:
experiment_path = f'size_{size}/test_{test_signs}/train_{train_signs}/seed_{seed}'
metadata_path = os.path.join(data_path, experiment_path, "metadata.json")
target_cm_path = os.path.join(confusion_path, experiment_path, f"cm_size{size}_train{train_signs}_test{test_signs}_seed{seed}_fc-test-subset.csv")
all_cm_path = os.path.join(confusion_path, experiment_path, f"cm_size{size}_train{train_signs}_test{test_signs}_seed{seed}_fc-train.csv")
test_data_path = os.path.join(data_path, experiment_path, "test.csv")

with open(metadata_path, "r") as f:
  metadata = json.load(f)

test_signs = metadata.get("test_signs", [])
train_signs = metadata.get("train_signs", [])


# Extra Signs

In [21]:
additional_train_signs = list(set(train_signs) - set(test_signs))
print("Number of extra signs seen in training: ", len(additional_train_signs))
print(sorted(additional_train_signs))

Number of extra signs seen in training:  200
['all', 'allgone', 'alot', 'am', 'ant', 'any', 'awake', 'baby', 'babysitter', 'bathtub', 'be', 'beads', 'bear', 'because', 'bed', 'bellybutton', 'belt', 'bib', 'big', 'bird', 'bite', 'boy', 'break', 'broken', 'brother', 'bubbles', 'bucket', 'bug', 'build', 'bump', 'button', 'buy', 'can', 'candy', 'cartcarriage', 'catch', 'cereal', 'chase', 'cheese', 'chicken', 'child', 'chocolate', 'coffee', 'could', 'cover', 'dance', 'dark', 'diaper', 'did', 'do', 'doctor', 'dont', 'dress', 'dry', 'duck', 'ear', 'eat', 'empty', 'enter', 'face', 'fast', 'firetruck', 'first', 'flag', 'flower', 'full', 'fullfood', 'garage', 'garbage', 'garden', 'gentle', 'giraffe', 'girl', 'givemefive', 'grandpa', 'grass', 'hammer', 'hand', 'hard', 'hat', 'haveto', 'head', 'hear', 'heavy', 'help', 'hesheit', 'hide', 'hit', 'horse', 'hungry', 'hurry', 'hurt', 'into', 'is', 'jar', 'kick', 'kitty', 'knife', 'lamp', 'lawnmower', 'lightbulb', 'like', 'lips', 'long', 'look', 'loud',

#Setting up a confusion matrix

In [22]:
# Read confusion matrix for classification head on all signs in testing (all train signs) set
all_cm_df = pd.read_csv(all_cm_path)
train_idx_to_label = dict(enumerate(sorted(metadata['train_signs'])))
all_cm_df['true_label'] = all_cm_df['true_idx'].map(train_idx_to_label)
all_cm_df['pred_label'] = all_cm_df['pred_idx'].map(train_idx_to_label)

# # Read confusion matrix for classification head on signs only in target (test_signs) set
cm_df = pd.read_csv(target_cm_path)
test_idx_to_label = dict(enumerate(sorted(metadata['test_signs'])))
cm_df['true_label'] = cm_df['true_idx'].map(test_idx_to_label)
cm_df['pred_label'] = cm_df['pred_idx'].map(test_idx_to_label)

In [23]:
print(all_cm_df.head())
print(cm_df.head())

   true_idx  pred_idx true_label pred_label  count  row_norm
0         0         0          a          a     53  0.841270
1         0       194          a      knock      1  0.015873
2         0       231          a       nuts      2  0.031746
3         0       263          a     puzzle      1  0.015873
4         0       308          a      spill      1  0.015873
   true_idx  pred_idx true_label pred_label  count  row_norm
0         0         0          a          a     60  0.952381
1         0       182          a  underwear      1  0.015873
2         0       194          a     window      2  0.031746
3         1         1      about      about     59  0.819444
4         1         4      about  alligator      1  0.013889


In [24]:
classes = sorted(set(cm_df.true_label).union(cm_df.pred_label))

pivot_cm = cm_df.pivot_table(
    index="true_label",
    columns="pred_label",
    values="count",
    aggfunc="sum",
    fill_value=0
).reindex(index=classes, columns=classes, fill_value=0)


In [25]:
display(pivot_cm)

pred_label,a,about,after,airplane,alligator,an,and,animal,another,apple,...,were,where,who,why,window,wipe,wish,wolf,yellow,zebra
true_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,60,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
about,0,59,0,0,1,0,0,0,0,0,...,0,0,0,5,0,0,0,0,0,0
after,0,0,83,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
airplane,0,0,0,68,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
alligator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wipe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
wish,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,59,0,0,0
wolf,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,99,0,1
yellow,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Recompute Accuracy

In [26]:
diagonal_values = pivot_cm.values.diagonal().sum()
all_values = pivot_cm.values.sum()
accuracy = diagonal_values / all_values
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 86.10%


#Confusion Analysis

In [27]:
confusions = []
for true_label in pivot_cm.index:
    for pred_label in pivot_cm.columns:
        if true_label != pred_label and pivot_cm.loc[true_label, pred_label] > 0:
            confusions.append((pivot_cm.loc[true_label, pred_label], true_label, pred_label))

confusions.sort(reverse=True)

for i in range(10):
  pair = confusions[i]
  print("Top " + str(i+1) + " Confusion - True label: " + pair[1] + ", Predicted label: " + pair[2] + ", Count: " + str(pair[0]))

Top 1 Confusion - True label: see, Predicted label: paint, Count: 32
Top 2 Confusion - True label: finish, Predicted label: stick, Count: 19
Top 3 Confusion - True label: was, Predicted label: have, Count: 16
Top 4 Confusion - True label: bunny, Predicted label: doll, Count: 12
Top 5 Confusion - True label: pen, Predicted label: wait, Count: 11
Top 6 Confusion - True label: have, Predicted label: was, Count: 11
Top 7 Confusion - True label: couch, Predicted label: chair, Count: 11
Top 8 Confusion - True label: see, Predicted label: hate, Count: 10
Top 9 Confusion - True label: hate, Predicted label: paint, Count: 10
Top 10 Confusion - True label: ball, Predicted label: oven, Count: 10


In [28]:
homosigns = [
    ["alot", "much"],
    ["ant", "bug"],
    ["around", "turnaround"],
    ["asleep", "sleep"],
    ["bathtub", "bath"],
    ["bear", "teddy"],
    ["big", "tall"],
    ["break", "broken"],
    ["cartcarriage", "hammer"],
    ["chicken", "bird"],
    ["coats", "jacket"],
    ["cook", "kitchen", "share"],
    ["couch", "bench", "porch", "sofa"],
    ["could", "can"],
    ["deer", "moose"],
    ["dont", "not"],
    ["dress", "shirt"],
    ["eat", "food"],
    ["enter", "into", "slipper"],
    ["gentle", "soft", "wet"],
    ["get", "tights"],
    ["goingto", "gotto", "go"],
    ["hand", "wait", "finger"],
    ["hug", "love"],
    ["little", "short", "child"],
    ["must", "haveto"],
    ["needneedto", "should"],
    ["nightnight", "tonightnight"],
    ["other", "another"],
    ["over", "after"],
    ["party", "play", "yellow"],
    ["dump", "pour"],
    ["present", "gift"],
    ["bunny", "rabbit"],
    ["rock", "stone"],
    ["slide", "slideverb"],
    ["sneaker", "shoe"],
    ["swim", "pool"],
    ["taken", "fast"],
    ["toilet", "bathroom"],
    ["was", "were"],
    ["wind", "windy"],
    ["wish", "hungry"],
    ["awake", "wake"],
    ["glasswindow", "tooth"],
    ["beside", "person"],
    ["pen", "pencil"],
    ["hi", "hello"]
 ]

In [29]:
#print(sorted(train_signs))
print(sorted(test_signs))
print(len(test_signs))

['a', 'about', 'after', 'airplane', 'alligator', 'an', 'and', 'animal', 'another', 'apple', 'are', 'asleep', 'baabaa', 'backyard', 'bad', 'ball', 'balloon', 'basement', 'basket', 'bathroom', 'bee', 'before', 'bicycle', 'black', 'block', 'breakfast', 'brown', 'bunny', 'bus', 'but', 'car', 'carry', 'cat', 'chair', 'chalk', 'cheek', 'children', 'choose', 'clap', 'close', 'cloud', 'clown', 'coats', 'cold', 'couch', 'cow', 'cowboy', 'crib', 'cry', 'cut', 'daddy', 'day', 'does', 'doll', 'door', 'down', 'downtown', 'drive', 'drop', 'dryer', 'dump', 'elephant', 'every', 'fall', 'farm', 'feed', 'find', 'finish', 'fireman', 'fish', 'fit', 'fix', 'food', 'for', 'frenchfries', 'get', 'gift', 'give', 'glasswindow', 'go', 'gopotty', 'green', 'hair', 'hate', 'have', 'hello', 'hi', 'highchair', 'hold', 'hose', 'hot', 'icecream', 'if', 'insidein', 'jacket', 'jelly', 'knock', 'last', 'later', 'letme', 'lion', 'listen', 'love', 'man', 'many', 'melon', 'minemy', 'mitten', 'mouth', 'necklace', 'new', 'nigh

In [30]:
homosigns_in_test = []
homosigns_and_corresponding_in_test = []
additional_homonyms_in_train = []

# goes through all homosigns
for homosign in homosigns:
  for sign in homosign:
    # if a homosign appears in test set
    if sign in set(test_signs):
      homosigns_in_test.append(sign)
      if len(homosign) > 1:
        others = homosign[:]
        others.remove(sign)
        for other in others:
          if other in set(test_signs):
            homosigns_and_corresponding_in_test.append((sign, other))

      # check if corresponding homosigns appear in additional train set
      for other_sign in homosign:
        if other_sign in set(additional_train_signs):
          additional_homonyms_in_train.append((sign, other_sign))

print("Homosigns present in test set:")
print(sorted(homosigns_in_test))
print(len(homosigns_in_test))
print("Homosign pairs present in test set:")
print(homosigns_and_corresponding_in_test)
print(len(homosigns_and_corresponding_in_test) // 2)

print("Corresponding homonyms present in train (homonym_in_test, corresponding)")
print(sorted(additional_homonyms_in_train))
print(len(additional_homonyms_in_train))

Homosigns present in test set:
['after', 'another', 'asleep', 'bathroom', 'bunny', 'coats', 'couch', 'dump', 'food', 'get', 'gift', 'glasswindow', 'go', 'hello', 'hi', 'jacket', 'love', 'pen', 'pencil', 'pool', 'porch', 'shoe', 'short', 'slipper', 'sofa', 'taken', 'teddy', 'toilet', 'turnaround', 'wait', 'was', 'were', 'wish', 'yellow']
34
Homosign pairs present in test set:
[('coats', 'jacket'), ('jacket', 'coats'), ('couch', 'porch'), ('couch', 'sofa'), ('porch', 'couch'), ('porch', 'sofa'), ('sofa', 'couch'), ('sofa', 'porch'), ('toilet', 'bathroom'), ('bathroom', 'toilet'), ('was', 'were'), ('were', 'was'), ('pen', 'pencil'), ('pencil', 'pen'), ('hi', 'hello'), ('hello', 'hi')]
8
Corresponding homonyms present in train (homonym_in_test, corresponding)
[('food', 'eat'), ('get', 'tights'), ('gift', 'present'), ('shoe', 'sneaker'), ('short', 'child'), ('slipper', 'enter'), ('slipper', 'into'), ('taken', 'fast'), ('teddy', 'bear'), ('wait', 'hand'), ('wish', 'hungry')]
11


In [31]:
# merge mapping
mapping = {}
for pair in homosigns_and_corresponding_in_test:
  if pair[0] not in mapping:
    mapping[pair[0]] = pair[0] + "_" + pair[1]
  if pair[1] not in mapping:
    mapping[pair[1]] = pair[0] + "_" + pair[1]

print(mapping)



{'coats': 'coats_jacket', 'jacket': 'coats_jacket', 'couch': 'couch_porch', 'porch': 'couch_porch', 'sofa': 'couch_sofa', 'toilet': 'toilet_bathroom', 'bathroom': 'toilet_bathroom', 'was': 'was_were', 'were': 'was_were', 'pen': 'pen_pencil', 'pencil': 'pen_pencil', 'hi': 'hi_hello', 'hello': 'hi_hello'}


In [32]:
merged_cm = (
    pivot_cm
    .rename(index=mapping, columns=mapping)
    .groupby(level=0, axis=0).sum()
    .groupby(level=0, axis=1).sum()
)

  .groupby(level=0, axis=0).sum()
  .groupby(level=0, axis=1).sum()


In [33]:
merged_cm

pred_label,a,about,after,airplane,alligator,an,and,animal,another,apple,...,waterhose,where,who,why,window,wipe,wish,wolf,yellow,zebra
true_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,60,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
about,0,59,0,0,1,0,0,0,0,0,...,0,0,0,5,0,0,0,0,0,0
after,0,0,83,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
airplane,0,0,0,68,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
alligator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
wipe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
wish,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,59,0,0,0
wolf,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,99,0,1
yellow,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
merged_diagonal_values = merged_cm.values.diagonal().sum()
merged_all_values = merged_cm.values.sum()
merged_accuracy = merged_diagonal_values / merged_all_values
print(f"Accuracy: {merged_accuracy * 100:.2f}%")

Accuracy: 86.11%


In [35]:
merged_confusions = []
for true_label in merged_cm.index:
    for pred_label in merged_cm.columns:
        if true_label != pred_label and merged_cm.loc[true_label, pred_label] > 0:
            merged_confusions.append((merged_cm.loc[true_label, pred_label], true_label, pred_label))

merged_confusions.sort(reverse=True)

for i in range(10):
  pair = merged_confusions[i]
  print("Top " + str(i+1) + " Confusion - True label: " + pair[1] + ", Predicted label: " + pair[2] + ", Count: " + str(pair[0]))

Top 1 Confusion - True label: see, Predicted label: paint, Count: 32
Top 2 Confusion - True label: finish, Predicted label: stick, Count: 19
Top 3 Confusion - True label: was_were, Predicted label: have, Count: 16
Top 4 Confusion - True label: bunny, Predicted label: doll, Count: 12
Top 5 Confusion - True label: pen_pencil, Predicted label: wait, Count: 11
Top 6 Confusion - True label: have, Predicted label: was_were, Count: 11
Top 7 Confusion - True label: couch_porch, Predicted label: chair, Count: 11
Top 8 Confusion - True label: see, Predicted label: hate, Count: 10
Top 9 Confusion - True label: hate, Predicted label: paint, Count: 10
Top 10 Confusion - True label: ball, Predicted label: oven, Count: 10
