In [1]:
from __future__ import division

from keras.layers.core import Dense, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import SGD
from pprint import pprint
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import classification_report
from sklearn.preprocessing import Imputer
import json
import numpy as np
import sys
import pickle

Using Theano backend.


# Load

In [2]:
basePath = "/mfs/replicated/tivvit/externalImages/"

In [3]:
imgsProperties = pickle.load(open(basePath + "img_prop.pkl", "rb"))
imgsResults = pickle.load(open(basePath + "img_res.pkl", "rb"))

In [4]:
class NeuralLearner:
    def __init__(self):
        self.model = Sequential()

    def vectorize(self, listOfDicts):
        self.vec = DictVectorizer()
        ldArray = self.vec.fit_transform(listOfDicts).toarray()
        ldArray = Imputer().fit_transform(ldArray)
        ldNames = np.asarray(self.vec.get_feature_names())
        return ldNames, ldArray

    def train(self, XArray, YArray):
        self.model.add(Dense(input_dim=len(XArray[0]),
                             output_dim=len(XArray[0]) / 2,
                             init="uniform"))
        self.model.add(Activation("tanh"))

        self.model.add(Dense(input_dim=len(XArray[0]) / 2,
                             output_dim=len(XArray[0]) / 2,
                             init="uniform"))
        self.model.add(Activation("tanh"))
#       self.model.add(Dropout(0.3))

        self.model.add(Dense(input_dim=len(XArray[0]) / 2,
                             output_dim=42,
                             init="uniform"))
        self.model.add(Activation("tanh"))

        self.model.add(Dense(input_dim=42,
                             output_dim=42,
                             init="uniform"))
        self.model.add(Activation("tanh"))
        self.model.add(Dense(input_dim=42,
                             output_dim=len(YArray[0]),
                             init="uniform"))
        self.model.add(Activation("softmax"))
        
        self.model.compile(loss='categorical_crossentropy',
                           optimizer=SGD(lr=0.05,
                                         momentum=0.9,
                                         decay=0.0001,
                                         nesterov=True),
                           metrics=['accuracy'])

        self.model.fit(XArray, YArray, nb_epoch=4, batch_size=200, verbose=0)

    def evaluate(self, XTest, YTest):
        objective_score = self.model.evaluate(XTest, YTest, batch_size=32)
        classes = self.model.predict_classes(XTest, batch_size=32)     
        #proba = self.model.predict_proba(XTest, batch_size=32)
        
        print("Objective_score:", objective_score)
        YTest = self.prepairYArray(YTest)
        print(classification_report(YTest, classes))

    def prepairYArray(self, array):
        return np.array([np.where(r == 1)[0][0] for r in array])

    def split(self, XArray, YArray):
        stratifiedSplit = StratifiedShuffleSplit(YArray,
                                                 test_size=0.2,
                                                 random_state=0)
        for train_index, test_index in stratifiedSplit:
            X_train, X_test = XArray[train_index], XArray[test_index]
            Y_train, Y_test = YArray[train_index], YArray[test_index]
        
        return X_train, Y_train, X_test, Y_test

    def storeModel(self, prefix):
        json_string = self.model.to_json()
        open(prefix + '.json', 'w').write(json_string)
        self.model.save_weights(prefix + '.h5')

# Vectorize

In [5]:
learner = NeuralLearner()

XNames, XArray = learner.vectorize(imgsProperties)
#pickle.dump(learner.vec, open("xvec.pkl", "wb"))
YNames, YArray = learner.vectorize(imgsResults)
#pickle.dump(YNames, open("ynames.pkl", "wb"))

# Scale

In [6]:
sc = StandardScaler()
sc.fit(XArray)
XArray_std = sc.transform(XArray)

# Split

In [7]:
print(YArray.shape, XArray.shape, XArray_std.shape)
XA_train, YA_train, XA_test, YA_test = learner.split(XArray_std, YArray)

((30327, 3), (30327, 144), (30327, 144))


# Train

In [8]:
learner.train(XA_train, YA_train)

  return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
  return variable(np.zeros(shape), dtype, name)


# Test

In [9]:
learner.evaluate(XA_test, YA_test)

('Objective_score:', [0.62960746255153111, 0.72590394551471793])
             precision    recall  f1-score   support

          0       0.79      0.65      0.72      5752
          1       0.64      0.72      0.68      6647
          2       0.78      0.80      0.79      5799

avg / total       0.73      0.73      0.73     18198



# Store

In [10]:
#learner.storeModel(storeModelprefix)