In [1]:
TRAINING_DATASET = '/data/patch_features_small/train2014_embeddings.p'
TEST_DATASET = '/data/patch_features_small/test2014_positive.p'
S3_BUCKET = 'cse-547'
S3_LOG_KEY = 'project/train/training_log_103740db-7181-4771-aa0b-cfa7cc407cf8.pkl' # See train_s3_model_key.
S3_MODEL_KEY = 'project/train/model_103740db-7181-4771-aa0b-cfa7cc407cf8.pkl' # See train_s3_model_key.

In [2]:
from cse547.data import CocoPatchesDataset, OneShotDataLoader
from cse547.models import MultiLayerPerceptron
from cse547.s3 import deserialize_object

state_dict = deserialize_object(S3_BUCKET, S3_MODEL_KEY)
training_log = deserialize_object(S3_BUCKET, S3_LOG_KEY)

dataset = CocoPatchesDataset.from_state_dict_files([TEST_DATASET])
eval_data = iter(OneShotDataLoader(dataset)).next()
model = MultiLayerPerceptron(
    eval_data['features'].size()[1],
    eval_data['label'].size()[1],
    training_log['model']['hidden_units'],
    training=False,
    dropout=training_log['model']['dropout'])
model.load_state_dict(state_dict)

In [3]:
import torch
from torch.nn import functional

with torch.no_grad():
    predictions = functional.sigmoid(model(eval_data['features']))

In [4]:
from sklearn.metrics import average_precision_score

test_average_precision_score = average_precision_score(eval_data['label'].data.numpy(), predictions.data.numpy())
with open('test_average_precision_score.tex', 'w') as f:
    f.write('{0:.6}'.format(test_average_precision_score))
test_average_precision_score

0.25411190573730397

In [5]:
import numpy as np
import pickle

with open(TRAINING_DATASET, 'rb') as f:
    training_observations_cnt = np.sum(
        pickle.load(f)['labels'], axis=0, dtype=np.int)

In [6]:
from collections import OrderedDict
import pandas as pd

evaluation_by_label = pd.DataFrame(OrderedDict([
    ('Label', [category['name'] for category in dataset.categories]),
    ('Training Observations', training_observations_cnt),
    ('Test Observations', np.sum(eval_data['label'].data.numpy(), axis=0, dtype=np.int)),    
    ('Average Precision Score', average_precision_score(eval_data['label'].data.numpy(), predictions.data.numpy(), average=None)),
]))
evaluation_by_label

Unnamed: 0,Label,Training Observations,Test Observations,Average Precision Score
0,bicycle,9950,1847,0.027483
1,car,56694,11010,0.36249
2,motorcycle,13163,2757,0.040598
3,airplane,15830,2529,0.07773
4,bus,16103,2985,0.087326
5,train,4568,873,0.040963
6,truck,29670,6045,0.117114
7,boat,15037,3179,0.057605
8,bird,28911,6232,0.314083
9,cat,12070,2778,0.500823


In [7]:
with open('test_average_precision_score_by_class.tex', 'w') as f:
    f.write(evaluation_by_label.to_latex(index=False))