In [18]:
from __future__ import division
import numpy as np 
import pickle
import scipy.io as sio
import scipy.sparse as ss 

from sklearn.svm import LinearSVC, NuSVC, SVC 
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier 
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

np.random.seed(12345)

In [7]:
# training data:
# train_feats = BOW linguistic features
# train_image_feats = BOC image categorical features
# train_y = FOILED or NOT FOILED => IF FOILED = 1, else 0
#train_feats = sio.mmread('data/train_feats.mtx')
#train_image_feats = np.load('data/train_image_feats_resnet.npy')
#train_target = np.array(sio.mmread('data/train_y.mtx').todense()).tolist()[0]
#############
# validation data: Same pattern as training; 
# The testing data comes from the karpathy 5k validation set only. 
val_feats = sio.mmread('data/test_feats.mtx')
val_image_feats = np.load('data/test_image_feats_resnet.npy')
val_target = np.array(sio.mmread('data/test_y.mtx').todense()).tolist()[0]

In [3]:
val_feats = sio.mmread('data/test_feats.mtx')
val_image_feats = np.load('data/test_image_feats_resnet.npy')
val_target = np.array(sio.mmread('data/test_y.mtx').todense()).tolist()[0]

In [20]:
X_val = val_image_feats
Y_test = np.array(val_target)

import pickle

decisiontree = pickle.load(open('decisiontree_resnet_foil_nouns.pkl', 'rb'))
target_names = ['REAL', 'FAKE']

print 'Accuracy = ', metrics.accuracy_score(Y_test, decisiontree.predict(X_val))
cmat = metrics.confusion_matrix(Y_test, decisiontree.predict(X_val))
print sum(cmat.diagonal() / cmat.sum(axis=1)) / 2
print zip(target_names, cmat.diagonal()/cmat.sum(axis=1))

Accuracy =  0.500106272749
0.500106272749
[('REAL', 0.64710805281755623), ('FAKE', 0.3531044926804644)]


In [21]:
#print val_feats.shape, val_image_feats.shape

!du -h decisiontree_resnet_foil_nouns.pkl

35M	decisiontree_resnet_foil_nouns.pkl


In [22]:
# Data processing, concatinating images with
# linguistic features and image features 
#X_train = np.hstack([train_feats.todense(), train_image_feats])
X_val = np.hstack([val_feats.todense(), val_image_feats])

#Y_train = np.array(train_target)

print 'loaded train sets'
Y_test = np.array(val_target)

loaded train sets


In [25]:
# Logistic Regression with 'l2' penalty
logistic = LogisticRegression(penalty='l1')
logistic.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, logistic.predict(X_val))

Accuracy =  0.751926193576


In [24]:
# Linear Support Vector Classifier with l2 regularizer and hinge loss
linearsvc = LinearSVC(penalty='l2', loss='hinge', verbose=1)
linearsvc.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, linearsvc.predict(X_val))

[LibLinear]Accuracy =  0.757186694652




In [None]:
# rbf kernel Support Vector Classifier with l2 regularizer and hinge loss
rbfsvc = SVC(kernel='linear')
rbfsvc.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, rbfsvc.predict(X_val))

In [None]:
# Decision tree classifier
decisiontree = DecisionTreeClassifier(random_state=0)
decisiontree.fit(X_train, Y_train)


In [None]:
print 'saving...' 

filename = 'decisiontree_resnet_foil_nouns.pkl'

import pickle 
pickle.dump(decisiontree, open(filename, 'wb'))

In [24]:
print 'loading model'
filename = 'decisiontree_BOW_resnet_foil_nouns.pkl'
import pickle 
decisiontree = pickle.load(open(filename, 'rb'))
print 'Accuracy = ', metrics.accuracy_score(Y_test, decisiontree.predict(X_val))

loading model
Accuracy =  0.884275618375


In [None]:
cmat = metrics.confusion_matrix(Y_test, decisiontree.predict(X_val))
print zip(target_names, cmat.diagonal()/cmat.sum(axis=1))

print sum(cmat.diagonal()/cmat.sum(axis=1)) / 2

In [23]:
# standard Gradient Boosting Classifier 
gb = GradientBoostingClassifier()
gb.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, gb.predict(X_val.toarray()))

Accuracy =  0.826456600866


In [22]:
# extremely randomized tree classifier.
ert = ExtraTreeClassifier(splitter='best')
ert.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, ert.predict(X_val.toarray()))

Accuracy =  0.743132123595


In [None]:
pickle.dump(decisiontree, open('decisiontreeclassifiermodel.pkl', 'w'))

In [None]:
# MLP Classifier as in the FOIL paper
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1)
mlp.fit(X_train, Y_train)
print 'Accuracy = ', metrics.accuracy_score(Y_test, mlp.predict(X_val.toarray()))