In [1]:
import torch
import numpy as np
import pandas as pd

from train_model import run_model, MODELS, WordModel, train_model, test_model
from data_processor import DoqProcessor, VerbProcessor, ProstProcessor

In [37]:
doq_train = DoqProcessor('train')
train_labels, _, train_sentences, _ = doq_train.forward()

doq_test = DoqProcessor('test')
test_labels, _, test_sentences, _ = doq_test.forward()

doq = {}
doq['train'] = {
    'labels': train_labels,
    'sents': train_sentences,
}

doq['test'] = {
    'labels': test_labels,
    'sents': test_sentences,
}

In [38]:
prost_train = ProstProcessor('train', batch_size=0, attribute='all')
train_labels, _, train_sentences, _ = prost_train.forward()

prost_test = ProstProcessor('test', batch_size=0, attribute='all')
test_labels, _, test_sentences, _ = prost_test.forward()

prost = {}
prost['train'] = {
    'labels': train_labels,
    'sents': train_sentences,
}

prost['test'] = {
    'labels': test_labels[:1000],
    'sents': test_sentences[:1000],
}

Using custom data configuration default
Reusing dataset prost (/Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7)
Loading cached split indices for dataset at /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-556ac204ac069f7c.arrow and /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-746b2ee70a966be3.arrow
Using custom data configuration default
Reusing dataset prost (/Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7)
Loading cached split indices for dataset at /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-556ac204ac069f7c.arrow and /Users/g/.cache/huggingf

In [None]:
verb = {}
for attr in ['weight', 'speed', 'size', 'strength', 'rigidness']:
    verb_train = VerbProcessor(attr, 20, 'train', 0)
    train_labels, _, train_sentences, _ = verb_train.forward()

    verb_test = VerbProcessor(attr, 20, 'test', 0)
    test_labels, _, test_sentences, _ = verb_test.forward()

    verb[attr] = {}
    verb[attr]['train'] = {
        'labels': train_labels,
        'sents': train_sentences,
    }

    verb[attr]['test'] = {
        'labels': test_labels,
        'sents': test_sentences,
    }

In [4]:
verb = {}
for attr in ['weight', 'speed', 'size', 'strength', 'rigidness']:
    verb_train = VerbProcessor(attr, 20, 'train', 0)
    train_labels, _, train_sentences, _ = verb_train.forward()

    verb_test = VerbProcessor(attr, 20, 'test', 0)
    test_labels, _, test_sentences, _ = verb_test.forward()

    verb[attr] = {}
    verb[attr]['train'] = {
        'labels': train_labels,
        'sents': train_sentences,
    }

    verb[attr]['test'] = {
        'labels': test_labels,
        'sents': test_sentences,
    }

In [1]:
def evaluate_prost(model, try_encoder=True):    
    m = WordModel(model, try_encoder=try_encoder)
    train_features = m.get_features(prost['train']['sents'])
    test_features = m.get_features(prost['test']['sents'])
    
    lr = train_model(train_features, prost['train']['labels'])
    acc, probs = test_model(lr, test_features, prost['test']['labels'], return_probs=True)
    
    assert len(probs) % 4 == 0
    
    correct_count = 0.0
    
    for i in range(0, probs.shape[0], 4):
        example = probs[i:i+4]
        true_pred = np.max(example, axis=0)
        
        for j, ex in enumerate(example):
            if ex[1] == true_pred[1] and prost['test']['labels'][i + j]:
                correct_count += 1
                break
    
    return correct_count / (probs.shape[0] / 4) * 100

In [2]:
# Non-pooled evaluation
for model in MODELS:
    print(model)
    doq_acc = None
    prost_acc = None
    try:
        doq_acc = run_model(model, doq['train']['labels'], doq['train']['sents'], doq['test']['labels'], doq['test']['sents'], try_encoder=True, verbose=False)
        print("doq", doq_acc)
        verb_acc = {}
        for attr in verb:
            verb_acc[attr] = run_model(model, verb[attr]['train']['labels'], verb[attr]['train']['sents'], verb[attr]['test']['labels'], verb[attr]['test']['sents'], try_encoder=True, verbose=False)
            print(f"verb-{attr}", verb_acc[attr])
        prost_acc = evaluate_prost(model, try_encoder=True)
        print("prost", prost_acc)
    except Exception as e:
        print(e)
    
    model_accs[model] = {
        'doq': doq_acc,
        'prost': prost_acc,
    }
    
    for attr in verb:
        model_accs[model][f'verb_{attr}'] = verb_acc[attr]
    model_accs[model]['prost'] = prost_acc

NameError: name 'MODELS' is not defined

In [None]:
model_accs

In [14]:
df = pd.DataFrame(model_accs)

In [18]:
df.T

Unnamed: 0,doq,prost,verb_weight,verb_speed,verb_size,verb_strength,verb_rigidness
clip,66.727273,0.336179,78.510473,69.890511,79.209486,72.966102,67.79949
roberta,64.0,,84.173778,75.273723,82.608696,78.559322,71.452846
roberta_small,59.545455,0.659552,71.683476,62.226277,72.56917,68.983051,64.40102
visualbert,62.818182,0.324973,80.837859,76.824818,83.873518,80.0,74.341546


In [21]:
pf = df.mean(axis=1)

In [33]:
multimodal = df.clip + df.visualbert

TypeError: unsupported operand type(s) for +: 'method' and 'float'

In [50]:
pf = pf.T

In [54]:
multimodal = (pf['clip'] + pf['visualbert'])/2
unimodal = (pf['roberta'] + pf['roberta_small'])/2

In [69]:
models = pd.concat([multimodal, unimodal], axis=1)
models.columns =['visual+language', 'language']
models.T

Unnamed: 0,doq,prost,verb_weight,verb_speed,verb_size,verb_strength,verb_rigidness
visual+language,64.772727,33.057631,79.674166,73.357664,81.541502,76.483051,71.070518
language,61.772727,65.955176,77.928627,68.75,77.588933,73.771186,67.926933


In [4]:
def evaluate_prost(model):
    m = WordModel(model, try_encoder=True)
    train_features = m.get_features(prost['train']['sents'])
    test_features = m.get_features(prost['test']['sents'])
    
    lr = train_model(train_features, prost['train']['labels'])
    acc, probs = test_model(lr, test_features, prost['test']['labels'], return_probs=True)
    
    assert len(probs) % 4 == 0
    
    correct_count = 0.0
    
    for i in range(0, probs.shape[0], 4):
        example = probs[i:i+4]
        true_pred = np.max(example, axis=0)
        
        for j, ex in enumerate(example):
            if ex[1] == true_pred[1] and prost['test']['labels'][i + j]:
                correct_count += 1
                break
    
    return correct_count / (probs.shape[0] / 4) * 100

In [36]:
accs_embed = {}

# Non-pooled evaluation
for model in ['clip', 'roberta_small']:
    doq_acc = None
    prost_acc = None
    try:
        doq_acc = run_model(model, doq['train']['labels'], doq['train']['sents'], doq['test']['labels'], doq['test']['sents'], try_encoder=False, verbose=False)
        print("doq", doq_acc)
        verb_acc = {}
        for attr in verb:
            verb_acc[attr] = run_model(model, verb[attr]['train']['labels'], verb[attr]['train']['sents'], verb[attr]['test']['labels'], verb[attr]['test']['sents'], try_encoder=False, verbose=False)
            print(f"verb-{attr}", verb_acc[attr])
        prost_acc = evaluate_prost(model)
        print("prost", prost_acc)
    except Exception as e:
        print(e)
    
    accs_embed[model] = {
        'doq': doq_acc,
        'prost': prost_acc,
    }
    
    for attr in verb:
        accs_embed[model][f'verb_{attr}'] = verb_acc[attr]

100%|███████████████████████████████████████████| 55/55 [00:01<00:00, 51.48it/s]

Expected 2D array, got 1D array instead:
array=[-2.6959957e-11 -4.3043177e-11  2.5684849e-12 ... -1.0659615e-02
 -2.2763608e-02 -1.0907674e-02].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.





NameError: name 'verb' is not defined

In [28]:
accs_df = accs_df.T

In [17]:
multimodal = (accs_df['clip'] + accs_df['lxmert'] + accs_df['visualbert'])/3
unimodal = (accs_df['roberta'] + accs_df['t5'] + accs_df['roberta_small'])/3

In [18]:
models = pd.concat([multimodal, unimodal], axis=1)
models.columns =['visual+language', 'language']
models.T

Unnamed: 0,doq,prost,verb_weight,verb_speed,verb_size,verb_strength,verb_rigidness
visual+language,63.484848,34.009249,81.872252,71.472019,82.29249,77.966102,71.65109
language,62.606061,49.501957,78.019136,68.248175,78.26087,73.389831,68.677428


In [24]:
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [20]:
m = WordModel('roberta', try_encoder=True)

train_features = m.get_features(prost['train']['sents'])
test_features = m.get_features(prost['test']['sents'])

100%|█████████████████████████████████████████| 100/100 [01:16<00:00,  1.30it/s]
100%|███████████████████████████████████████████| 50/50 [00:36<00:00,  1.36it/s]


In [21]:
train_features.shape

(2000, 15360)

In [22]:
test_features.shape

(1000, 15360)

In [25]:
lr = make_pipeline(StandardScaler(), SGDClassifier(random_state=0, max_iter=1000, loss="log", n_jobs=-1))

with torch.no_grad():
    lr.fit(train_features, prost['train']['labels'])

In [27]:
acc, probs = test_model(lr, test_features, prost['test']['labels'], return_probs=True, verbose=False)

In [30]:
l = []
for i, s in enumerate(prost['test']['sents']):
    if i % 4 == 0:
        l += [s]

for ex, prob in zip(l, probs):
    print(prob, ex)

[1. 0.] The circle around the car takes the least amount of paint. [SEP] A person paints a circle around a car, a house, a book, and a table.
[1. 0.] The lamps are the easiest to stack. [SEP] A person is trying to stack lamps, bottles, boxes, and eggs.
[0. 1.] The mirror is the hardest to roll. [SEP] A person is trying to roll a mirror, a bottle, a ball, and a can.
[1. 0.] The puck hit by the microwave slides the shortest distance. [SEP] A microwave, a brick, a leaf, and an egg moving at identical speeds each collide with a static hockey puck.
[1. 0.] The side of the seesaw with the coin moves up. [SEP] A coin and a brick are placed in baskets on either end of a perfectly balanced seesaw.
[1.00000000e+000 1.49446914e-161] The staircase leading to the top of the book is the easiest to walk up. [SEP] There are four staircases. The first leads to the top of a book, the second leads to the top of a mountain, the third leads to the top of a table, and the fourth leads to the top of a microw

In [19]:
evaluate_prost('roberta')

100%|█████████████████████████████████████████| 100/100 [01:15<00:00,  1.33it/s]
100%|███████████████████████████████████████████| 50/50 [00:35<00:00,  1.41it/s]


91.2