In [1]:
import os
import random

import numpy as np

from rfft.hypothesis import Hypothesis
from rfft.multilayer_perceptron import MultilayerPerceptron

from decoy_mnist import generate_dataset
from decoy_mnist import load_annotations
from decoy_mnist import load_hypothesis

In [2]:
Xr, X, y, E, Xtr, Xt, yt, Et = generate_dataset()

In [3]:
dirname = 'tagging/decoy_mnist'

In [4]:
def score_model(mlp):
    print('Train: {0}, Test: {1}'.format(mlp.score(X, y), mlp.score(Xt, yt)))
    return (mlp.score(X, y), mlp.score(Xt, yt))

In [14]:
def run_experiment():
    INCREMENT = 20
    WEIGHT_PER_MASK = 10
    
    xml_files = [os.path.join(dirname, x) for x in os.listdir(dirname) if x.endswith('.xml')]
    random.shuffle(xml_files)
    
    accuracies = []

    iteration = 0
    for indices, hypothesis in Hypothesis.incrementally_sample(xml_files,
                                                               load_hypothesis,
                                                               X.shape,
                                                               increment=INCREMENT):
        non_zero_rows = np.where(hypothesis.A != 0)[0]
        assert set(non_zero_rows) == set(indices)
        assert(len(indices) == iteration * INCREMENT)
        
        hypothesis.weight = WEIGHT_PER_MASK * len(indices)
        mlp = MultilayerPerceptron()
        mlp.fit(X,
                y,
                hypothesis=hypothesis,
                num_epochs=25,
                always_include=indices,
                show_progress_every=500,
                verbose=False)

        train_acc, test_acc = score_model(mlp)
        accuracies.append((len(indices), train_acc, test_acc))
        iteration += 1
    return accuracies

In [15]:
all_accuracies = []

In [None]:
for i in range(1):
    all_accuracies.append(run_experiment())



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import spline

In [None]:
accuracies = np.array(list(map(list, all_accuracies[1])))
xnew = np.linspace(accuracies[:, 0].min(), accuracies[:, 0].max(), 30)
power_smooth = spline(np.array(accuracies[:, 0]), np.array(accuracies[:, 2]), xnew)
plt.title('Number of Annotations vs Accuracy')
plt.xlabel('Number of annotations')
plt.ylabel('Test accuracy')
plt.plot(xnew, power_smooth)
plt.show()