In [1]:
import collections
import h5py
import numpy as np
import os.path
import sklearn
import sklearn.preprocessing
import sklearn.svm

In [2]:
current_path = "/home/vl1019/tetci17/experiments/deprecated/supervised"
test_path = os.path.join(current_path, "memoized_features", "Q=08_test.mat")
hdf5_file = h5py.File(test_path)
X = hdf5_file["/"]["X_features"].value
X = np.squeeze(X)
X = np.transpose(X, (1, 0, 2))

In [4]:
y = np.concatenate([i*np.ones(10, dtype=int) for i in range(10)])
folds = np.mod(np.arange(100), 5)
integration = "late"
compression = "log"

y_predicted = []
accuracies = []

for fold_id in range(5):
    X_test = X[:, folds==fold_id, :]
    y_test = y[folds==fold_id]
    X_training = X[:, folds!=fold_id, :]
    y_training = y[folds!=fold_id]
    
    # Logarithmic compression
    if compression == "logmedian":
        X_training = np.maximum(X_training, 0)
        X_test = np.maximum(X_test, 0)
        medians = np.median(X_training, axis=(0, 1))[np.newaxis, np.newaxis, :]
        X_training = np.log1p(1e2 * X_training / medians)
        X_test = np.log1p(1e2 * X_test / medians)
    if compression == "log":
        X_training = np.log(1e-6 + np.maximum(X_training, 0))
        X_test = np.log1p(1e-6 + np.maximum(X_test, 0))

    # Early integration: summarize features
    if integration == "early":
        X_training = np.sum(X_training, 0)
        X_test = np.sum(X_test, 0)
    
    # Late integration
    if integration == "late":
        y_training = np.reshape(np.tile(y_training, (128, 1)), (80*128, 1)).ravel()
        X_training = np.reshape(X_training,
            (X_training.shape[0]*X_training.shape[1], X_training.shape[2]))
        X_test = np.reshape(X_test,
            (X_test.shape[0]*X_test.shape[1], X_test.shape[2]))

    # Standardize features
    scaler = sklearn.preprocessing.StandardScaler().fit(X_training)
    X_training = scaler.transform(X_training)
    X_test = scaler.transform(X_test)

    # Train linear SVM
    clf = sklearn.svm.LinearSVC(C=1.0)
    clf.fit(X_training, y_training)

    # Predict and evaluate average miss rate
    if integration == "early":
        y_test_predicted = clf.predict(X_test)
    if integration == "late":
        vote_test = clf.predict(X_test)
        vote_test = np.reshape(vote_test, (128, 20))
        votes = [vote_test[:, n] for n in range(20)]
        counters = map(collections.Counter, votes)
        y_test_predicted = np.hstack(
            [ counter.most_common(1)[0][0] for counter in counters ])

    # Report accuracy
    accuracy =\
        sklearn.metrics.accuracy_score(y_test_predicted, y_test)
    accuracies.append(accuracy)
    y_predicted.append(y_test_predicted)

mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)
print(str(100 * mean_accuracy) + " +/- " + str(100 * std_accuracy))

10.0 +/- 0.0
