In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
import scipy.stats

from tqdm import tqdm
from collections import defaultdict
from models import MLP1Base, MLP2Base, MetaMLPModel

import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv('./parallelograms-revisited/experiment4_data_triads/mean_triad_ratings.csv')

In [3]:
df

Unnamed: 0,wordA,wordB,wordC,wordD,mean_rating,num_ratings
0,hat,head,mask,face,6.65,20
1,mask,face,disguise,identity,5.65,20
2,hat,head,disguise,identity,3.2,20
3,foot,shoe,wrist,bracelet,6.2,20
4,wrist,bracelet,wall,painting,5.75,20
5,foot,shoe,wall,painting,3.55,20
6,dog,mailman,cat,mouse,5.5,20
7,cat,mouse,horse,hay,3.95,20
8,dog,mailman,horse,hay,2.3,20
9,nurse,patient,mother,baby,5.55,20


In [4]:
vocab = torchtext.vocab.GloVe(name='840B', dim=300)

In [14]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f9e0f502750>

In [5]:
meta_mlp2_params = torch.load('./models/meta_mlp2.pt', map_location=torch.device('cpu'))

In [6]:
meta_mlp2 = MLP2Base(input_dim=300, hidden_dim=500, output_dim=300)

In [7]:
meta_mlp2.l1.weight.data = meta_mlp2_params['features.layer1.linear.weight']
meta_mlp2.l1.bias.data = meta_mlp2_params['features.layer1.linear.bias']
meta_mlp2.l2.weight.data = meta_mlp2_params['features.layer2.linear.weight']
meta_mlp2.l2.bias.data = meta_mlp2_params['features.layer2.linear.bias']
meta_mlp2.l3.weight.data = meta_mlp2_params['classifier.weight']
meta_mlp2.l3.bias.data = meta_mlp2_params['classifier.bias']

In [49]:
mm2opt = optim.SGD(meta_mlp2.parameters(), lr=0.1)

mm2cache = meta_mlp2.state_dict()

criterion = nn.MSELoss()

def get_m2_loss(a, b, c, d):
    meta_mlp2.load_state_dict(mm2cache)
    mm2opt.zero_grad()
    a_hat = meta_mlp2(a)
    loss = criterion(a_hat, b)
    print(loss)
    loss.backward()
    mm2opt.step()
    c_hat = meta_mlp2(c)
    loss = criterion(c_hat, d)
    return loss.detach().numpy()

In [50]:
def get_categorized_relsim_scores(df, vocab, get_loss):
    d1 = []
    d2 = []
    d3 = []
    
    for idx, row in tqdm(df.iterrows()):
        w1, w2, w3, w4 = row['wordA'], row['wordB'], row['wordC'], row['wordD']
        print(w1, w2, w3, w4)

        if w1 not in vocab.stoi or w2 not in vocab.stoi or w3 not in vocab.stoi or w4 not in vocab.stoi:
            continue        
            
        a = vocab[w1]
        b = vocab[w2]
        c = vocab[w3]
        d = vocab[w4]
        
        score = get_loss(a, b, c, d)
        # print(score)

        if idx % 3 == 0:
            d1.append(score)
        elif idx % 3 == 1:
            d2.append(score)
        else:
            d3.append(score)
        
    return d1, d2, d3

In [51]:
d1, d2, d3 = get_categorized_relsim_scores(df, vocab, get_m2_loss)

0it [00:00, ?it/s]

hat head mask face


16it [00:00,  5.75it/s]

tensor(0.0283, grad_fn=<MseLossBackward>)
mask face disguise identity
tensor(0.0192, grad_fn=<MseLossBackward>)
hat head disguise identity
tensor(0.0237, grad_fn=<MseLossBackward>)
foot shoe wrist bracelet
tensor(0.0341, grad_fn=<MseLossBackward>)
wrist bracelet wall painting
tensor(0.0591, grad_fn=<MseLossBackward>)
foot shoe wall painting
tensor(0.0315, grad_fn=<MseLossBackward>)
dog mailman cat mouse
tensor(0.0642, grad_fn=<MseLossBackward>)
cat mouse horse hay
tensor(0.0975, grad_fn=<MseLossBackward>)
dog mailman horse hay
tensor(0.0629, grad_fn=<MseLossBackward>)
nurse patient mother baby
tensor(0.0422, grad_fn=<MseLossBackward>)
mother baby frog tadpole
tensor(0.0602, grad_fn=<MseLossBackward>)
nurse patient frog tadpole
tensor(0.0394, grad_fn=<MseLossBackward>)
cocoon butterfly egg chicken
tensor(0.0329, grad_fn=<MseLossBackward>)
egg chicken wine vineyard
tensor(0.0668, grad_fn=<MseLossBackward>)
cocoon butterfly wine vineyard
tensor(0.0282, grad_fn=<MseLossBackward>)
person cl

36it [00:00, 75.00it/s]

tensor(0.0174, grad_fn=<MseLossBackward>)
password access key safe
tensor(0.0593, grad_fn=<MseLossBackward>)
veil face key safe
tensor(0.0144, grad_fn=<MseLossBackward>)
pride lion brain neuron
tensor(0.0252, grad_fn=<MseLossBackward>)
brain neuron computer chip
tensor(0.0842, grad_fn=<MseLossBackward>)
pride lion computer chip
tensor(0.0185, grad_fn=<MseLossBackward>)





In [35]:
d1

array([0.9556585 , 0.82968587, 0.8719486 , 0.9111938 , 0.8962018 ,
       0.9177142 , 0.8982647 , 0.9504485 , 0.9069916 , 0.87899077,
       0.8958831 , 0.8742659 ], dtype=float32)

In [39]:
d2

[array(0.22710404, dtype=float32),
 array(0.21097389, dtype=float32),
 array(0.25007328, dtype=float32),
 array(0.24651214, dtype=float32),
 array(0.4364564, dtype=float32),
 array(0.14437854, dtype=float32),
 array(0.21218824, dtype=float32),
 array(0.35399407, dtype=float32),
 array(0.460974, dtype=float32),
 array(0.26283708, dtype=float32),
 array(0.20503333, dtype=float32),
 array(0.20438068, dtype=float32)]

In [40]:
d3

[array(0.22926521, dtype=float32),
 array(0.21049125, dtype=float32),
 array(0.25126407, dtype=float32),
 array(0.24733093, dtype=float32),
 array(0.43577057, dtype=float32),
 array(0.14439394, dtype=float32),
 array(0.21220894, dtype=float32),
 array(0.3540213, dtype=float32),
 array(0.45735356, dtype=float32),
 array(0.2611624, dtype=float32),
 array(0.20499784, dtype=float32),
 array(0.20423287, dtype=float32)]

In [34]:
d1 = np.exp(-np.array(d1))
d2 = np.exp(-np.array(d2))
d3 = np.exp(-np.array(d3))

test = d1 * d2 * d3 / np.minimum(d1, d2, d3) ** 2

In [17]:
test

array([0.8718868 , 0.86214083, 0.83214366, 0.8770245 , 0.66922206,
       0.8841108 , 0.86812836, 0.89333737, 0.78879136, 1.1304805 ,
       0.8170541 , 0.84458864], dtype=float32)

In [18]:
scipy.stats.ttest_1samp(test, 1)

Ttest_1sampResult(statistic=-4.59774225346917, pvalue=0.0007677531171223022)

In [53]:
human = [2.04, 1.66, 1.86, 3.03, 1.64, 1.68, 1.64, 0.87, 1.85, 2.47, 1.52, 1.39]

In [64]:
scipy.stats.pearsonr(test, human)

(-0.25854797897704307, 0.41713535189152723)

In [65]:
df2 = pd.read_csv('./parallelograms-revisited/experiment4_data_triads/control.csv')

In [66]:
d1, d2, d3 = get_categorized_relsim_scores(df2, vocab, get_m2_loss)

36it [00:00, 231.99it/s]


In [38]:
scipy.stats.ttest_1samp(test, 1)

Ttest_1sampResult(statistic=3.580449712909401, pvalue=0.0050079565714423735)

In [19]:
def one_sample_one_tailed(sample_data, popmean, alpha=0.05, alternative='greater'):
    t, p = stats.ttest_1samp(sample_data, popmean)
    print ('t:',t)
    print ('p:',p)
    if alternative == 'greater' and (p/2 < alpha) and t > 0:
        print ('Reject Null Hypothesis for greater-than test')
    if alternative == 'less' and (p/2 < alpha) and t < 0:
        print ('Reject Null Hypothesis for less-than test')

In [20]:
from scipy import stats
one_sample_one_tailed(test, 1)

t: -4.59774225346917
p: 0.0007677531171223022


In [68]:
one_sample_one_tailed(test, 1)

t: -7.639773814600016
p: 1.7587701453873647e-05


In [34]:
df

Unnamed: 0,wordA,wordB,wordC,wordD,mean_rating,num_ratings
0,fruit,grape,song,opera,6.65,20.0
1,song,opera,rodent,mouse,5.65,20.0
2,fruit,grape,rodent,mouse,3.2,20.0
3,planet,Earth,state,Virginia,6.2,20.0
4,state,Virginia,country,Egypt,5.75,20.0
5,planet,Earth,country,Egypt,3.55,20.0
6,dog,tail,fish,fin,5.5,20.0
7,fish,fin,city,block,3.95,20.0
8,dog,tail,city,block,2.3,20.0
9,flock,sheep,album,songs5.55,20.0,
