In [1]:
import torch
import numpy as np
import pandas as pd

from train_model import run_model, MODELS, WordModel, train_model, test_model
from data_processor import DoqProcessor, VerbProcessor, ProstProcessor

In [3]:
doq_train = DoqProcessor('train')
train_labels, _, train_sentences, _ = doq_train.forward()

doq_test = DoqProcessor('test')
test_labels, _, test_sentences, _ = doq_test.forward()

doq = {}
doq['train'] = {
    'labels': train_labels,
    'sents': train_sentences,
}

doq['test'] = {
    'labels': test_labels,
    'sents': test_sentences,
}

In [4]:
prost_train = ProstProcessor('train', batch_size=0)
train_labels, _, train_sentences, _ = prost_train.forward()

prost_test = ProstProcessor('test', batch_size=0)
test_labels, _, test_sentences, _ = prost_test.forward()

prost = {}
prost['train'] = {
    'labels': train_labels,
    'sents': train_sentences,
}

prost['test'] = {
    'labels': test_labels,
    'sents': test_sentences,
}

Using custom data configuration default
Reusing dataset prost (/Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7)
Loading cached split indices for dataset at /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-3ba89f75d16d1d23.arrow and /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-dea55751f6b71d2b.arrow


500


Using custom data configuration default
Reusing dataset prost (/Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7)
Loading cached split indices for dataset at /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-556ac204ac069f7c.arrow and /Users/g/.cache/huggingface/datasets/corypaik___prost/default/1.0.1/5723fec956da2bd5e34a0ac6f53f398b771fbb743fc979c6a50b513888d537c7/cache-746b2ee70a966be3.arrow


1874


In [11]:
verb = {}
for attr in ['weight', 'speed', 'size', 'strength', 'rigidness']:
    verb_train = VerbProcessor(attr, 20, 'train', 0)
    train_labels, _, train_sentences, _ = verb_train.forward()

    verb_test = VerbProcessor(attr, 20, 'test', 0)
    test_labels, _, test_sentences, _ = verb_test.forward()

    verb[attr] = {}
    verb[attr]['train'] = {
        'labels': train_labels,
        'sents': train_sentences,
    }

    verb[attr]['test'] = {
        'labels': test_labels,
        'sents': test_sentences,
    }

In [20]:
# Non-pooled evaluation
model_accs = {}

for model in MODELS:
    print(model)
    try:
        doq_acc = run_model(model, doq['train']['labels'], doq['train']['sents'], doq['test']['labels'], doq['test']['sents'])
        prost_acc = run_model(model, prost['train']['labels'], prost['train']['sents'], prost['test']['labels'], prost['test']['sents'])
        verb_acc = {}
        for attr in verb:
            verb_acc[attr] = run_model(model, verb[attr]['train']['labels'], verb[attr]['train']['sents'], verb[attr]['test']['labels'], verb[attr]['test']['sents'])
    except Exception as e:
        print(e)
        continue
    
    model_accs[model] = {
        'doq': doq_acc,
        'prost': prost_acc,
    }
    
    for attr in verb:
        model_accs[model][f'verb_{attr}'] = verb_acc[attr]

clip
roberta
roberta_small
visualbert
t5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


lxmert
uniter


In [21]:
df = pd.DataFrame(model_accs)

In [22]:
df.T

Unnamed: 0,doq,prost,verb_weight,verb_speed,verb_size,verb_strength,verb_rigidness
clip,59.909091,75.0,71.916214,53.740876,77.15415,70.084746,65.165675
roberta,68.727273,75.0,84.406517,73.905109,84.031621,80.423729,73.916737
roberta_small,58.909091,75.0,70.054306,60.583942,70.750988,68.050847,63.636364
visualbert,65.636364,75.0,82.079131,71.624088,83.399209,78.813559,73.746814
t5,64.0,75.0,80.527541,66.332117,81.818182,75.847458,68.054376
lxmert,65.272727,75.0,81.225756,71.076642,82.687747,79.152542,71.792693
uniter,62.545455,75.0,73.235066,62.408759,75.098814,71.610169,67.79949


In [None]:
prost['train']['sents'][:10]

In [5]:
m = WordModel('uniter')

train_features = m.get_features(prost['train']['sents'])
test_features = m.get_features(prost['test']['sents'])

lr = train_model(train_features, prost['train']['labels'])

acc, probs = test_model(lr, test_features, prost['test']['labels'], return_probs=True)

In [8]:
for s, p, l in zip(prost['test']['sents'], probs, prost['test']['labels']):
    print(p, '[', l, ']', s)

[0.74993177 0.25006823] [ False ] The circle around the book takes the least amount of paint. [SEP] A person paints a circle around a car, a house, a book, and a table.
[0.74993177 0.25006823] [ False ] The circle around the book takes the least amount of paint. [SEP] A person paints a circle around a car, a house, a book, and a table.
[0.74993177 0.25006823] [ True ] The circle around the book takes the least amount of paint. [SEP] A person paints a circle around a car, a house, a book, and a table.
[0.74993177 0.25006823] [ False ] The circle around the book takes the least amount of paint. [SEP] A person paints a circle around a car, a house, a book, and a table.
[0.75000702 0.24999298] [ False ] The boxes are the easiest to stack. [SEP] A person is trying to stack lamps, bottles, boxes, and eggs.
[0.75000702 0.24999298] [ False ] The boxes are the easiest to stack. [SEP] A person is trying to stack lamps, bottles, boxes, and eggs.
[0.75000702 0.24999298] [ True ] The boxes are the 