# Example of training a gloss-decoding linear SVM on precalculated model features, saving the trained classifier, and calculating predicted gloss values for probe stimuli
- e.g. In the case of the PixelVAE network, the precalculated features are the 10D latent representations of all the training images.


In [None]:
%matplotlib inline

import os
import json
import numpy as np
import scipy.misc
import time
import pandas as pd
from sklearn.externals import joblib

from sklearn import svm, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import linear_model, svm

## 1. Load features and labels

In [None]:
# if model features are saved as numpy arrays
latent = np.load('./notebook_outputs/PixelVAE01_latents_mainset10k.npy')
savename = 'PixelVAE01' # suffix for saving trained classifier (used later for getting predictions for probe ims)
latent.shape

# alternatively, if features are saved as .csv files
# latent = np.genfromtxt('../models/pretrained_DNN/outputs/feats_10k_mainset/layer069.csv', delimiter=',')
# savename = 'pretrained' # suffix for saving trained classifier (used later for getting predictions for probe ims)
# latent.shape

In [None]:
# Fetch the .csv file that provides the ground-truth information about the material etc in each image
scene_log = pd.read_csv('../data/output_images_10k_scenes/scene_log.csv')
print(scene_log.head()) # sanity checking

# get the true material gloss labels from this table, as a numpy array
gloss_labels = np.array(scene_log['gloss_cat'][0:latent.shape[0]]).astype('int')

In [None]:
# scale data for better classifier training
x_scaled = preprocessing.scale(latent.astype(float))

## 2. Train gloss classifier

In [None]:
tic = time.time()

# using sklearn function to split into whatever proportions of train and test data you'd like
# Note: you can set a hard seed here so that train/test splits will be repeatable. Currently they're random and unknown.
x_train, x_test, y_train, y_test = train_test_split(x_scaled, gloss_labels, test_size=0.25)

print("Training X data shape: {}".format(x_train.shape))
print("Training Y targets shape: {}".format(y_train.shape))
    
# choose classifier and train
clf = svm.SVC(kernel='linear', verbose=True)
clf.fit(x_train, y_train)

# check test and training accuracy
test_acc = clf.score(x_test, y_test)
train_acc = clf.score(x_train, y_train)
print("Training data accuracy = {}, Test data accuracy = {}".format(train_acc, test_acc))
    
print("FINISHED. Decoding script took {} minutes.".format((time.time() - tic)/60))

In [None]:
# save trained gloss classifier
joblib.dump(clf,'./notebook_outputs/trained_gloss_classifier_{}.joblib'.format(savename))

## 3. Use the gloss classifier just created to calculate predicted gloss values for probe images

In [None]:
# loads the classifier we just saved
clf = joblib.load('./notebook_outputs/trained_gloss_classifier_{}.joblib'.format(savename))

In [None]:
# load features that have been precalculated for probe images
# e.g. a network's latent representations of a set of experimental stimuli
probefeats = np.load('./notebook_outputs/PixelVAE01_expt1_test_images.npy')
probename = 'expt1_test_images'

In [None]:
# calculate predicted gloss values, i.e. signed distance from the glossy/matte classifier hyperplane
probevals = clf.decision_function(probefeats)
probevals.shape

In [None]:
# save the predicted gloss values
np.save('./notebook_outputs/gloss_vals_{}_{}.npy'.format(savename, probename), probevals)