In [1]:
import numpy as np
import theano
import theano.tensor as T

import lasagne
import time

In [2]:
class ColorDescriptor:
    def __init__(self, bins):
        # store the number of bins for the 3D histogram
        self.bins = bins
 
    def describe(self, image):
        # convert the image to the HSV color space and initialize
        # the features used to quantify the image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        features = []
 
        # grab the dimensions and compute the center of the image
        (h, w) = image.shape[:2]
        (cX, cY) = (int(w * 0.5), int(h * 0.5))
        
        # divide the image into four rectangles/segments (top-left,
        # top-right, bottom-right, bottom-left)
        segments = [(0, cX, 0, cY), (cX, w, 0, cY), (cX, w, cY, h),
            (0, cX, cY, h)]
 
        # construct an elliptical mask representing the center of the
        # image
        (axesX, axesY) = (int(w * 0.75) / 2, int(h * 0.75) / 2)
        ellipMask = np.zeros(image.shape[:2], dtype = "uint8")
        cv2.ellipse(ellipMask, (cX, cY), (axesX, axesY), 0, 0, 360, 255, -1)
 
        # loop over the segments
        for (startX, endX, startY, endY) in segments:
            # construct a mask for each corner of the image, subtracting
            # the elliptical center from it
            cornerMask = np.zeros(image.shape[:2], dtype = "uint8")
            cv2.rectangle(cornerMask, (startX, startY), (endX, endY), 255, -1)
            cornerMask = cv2.subtract(cornerMask, ellipMask)
 
            # extract a color histogram from the image, then update the
            # feature vector
            hist = self.histogram(image, cornerMask)
            features.extend(hist)
 
        # extract a color histogram from the elliptical region and
        # update the feature vector
        hist = self.histogram(image, ellipMask)
        features.extend(hist)
 
        # return the feature vector
        return features
    def histogram(self, image, mask):
        # extract a 3D color histogram from the masked region of the
        # image, using the supplied number of bins per channel; then
        # normalize the histogram
        hist = cv2.calcHist([image], [0, 1, 2], mask, self.bins,
            [0, 180, 0, 256, 0, 256])
        hist = cv2.normalize(hist,hist).flatten()

        # return the histogram
        return hist

In [3]:
def build_model(input_var):
    network = lasagne.layers.InputLayer(shape=(None,1, 100,100), input_var = input_var)
    print lasagne.layers.get_output_shape(network)
    
    network = lasagne.layers.Conv2DLayer(network, num_filters = 100, filter_size = (3,3), stride = (2,2), pad = 'same',
                                         nonlinearity = lasagne.nonlinearities.tanh)
    
    print lasagne.layers.get_output_shape(network)
    
    network = lasagne.layers.Conv2DLayer(network, num_filters = 50, filter_size = (3,3), stride = (2,2), pad = 'same',
                                         nonlinearity = lasagne.nonlinearities.tanh)
    
    print lasagne.layers.get_output_shape(network)
    
    
    network = lasagne.layers.Conv2DLayer(network, num_filters = 25, filter_size = (3,3), stride = (2,2), pad = 'same',
                                         nonlinearity = lasagne.nonlinearities.tanh)
    
    print lasagne.layers.get_output_shape(network)
    
    network = lasagne.layers.DenseLayer(network, num_units = 800, nonlinearity = lasagne.nonlinearities.rectify)
    print lasagne.layers.get_output_shape(network)
    
    network = lasagne.layers.DenseLayer(network, num_units = 25, nonlinearity = lasagne.nonlinearities.softmax)
    print lasagne.layers.get_output_shape(network)
    
    return network

In [4]:
def iterate_minibatches(X, Y, batchsize):
    row, col, z, y = X.shape
    for i in range(int(row/batchsize)) :
        yield X[batchsize*i: batchsize*(i+1),:], Y[batchsize*i: batchsize*(i+1),:]
#     print i*batchsize, row
#     if i*batchsize < row:
#         print "I like you!"
#         yield X[batchsize*(i+1):,:], Y[batchsize*(i+1):,:]

In [5]:
def train_model(X_train, Y_train, X_test, Y_test):
    input_var = T.dtensor4("inputs")
    target_var = T.lmatrix("targets")
    X_train = X_train.reshape((-1,1,100,100))
    X_test = X_test.reshape((-1,1,100,100))
    network = build_model(input_var)
    prediction = lasagne.layers.get_output(network)
    
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    params = lasagne.layers.get_all_params(network, trainable=True)
    #updates = lasagne.updates.rmsprop(loss, params, learning_rate=0.01, rho=0.9, epsilon=1e-06)
    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var,axis = 1)),
                  dtype=theano.config.floatX)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    num_epochs = 20
    
            
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()
        for i,batch in enumerate(iterate_minibatches(X_train, Y_train, 100)):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            err, acc = val_fn(inputs, targets)
            train_acc += acc
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(X_test, Y_test, 100):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  training accuracy:\t\t{:.2f} %".format(
            train_acc / train_batches * 100))
        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))
    
    return network

In [6]:
import pandas as pd
df = pd.read_csv("./../GenresFullLabels.csv",delimiter=",").fillna("-NA-")
df.head()
print df.shape

(39263, 4)


In [7]:
columns = list(df.columns.values)
print columns

['Genre', 'imdbId', 'Feature1', 'Feature2']


In [14]:
import scipy.misc
from skimage.io import imread
import os
import fnmatch
import numpy as np
import cv2
def load_dataset():
    imageDir = "./../PostersResized40/"  
    imageData = np.zeros((38500,100,100))
    outputData = np.zeros((38500,25),dtype=int)
    cd = ColorDescriptor((8, 12, 3))
    for index, row in df.iterrows(): 
        filename = str(row[columns[1]])+".jpg"
        label = row[columns[3]]
        original_image = imread(imageDir+filename)
        features = cd.describe(original_image)
        print len(features)
        imageData[index,:,:] = original_image
        outputData[index,label] = 1
        if index == 38499:
            break
    return imageData,outputData

In [15]:
imageData,labels = load_dataset()
print imageData.shape

1440


ValueError: could not broadcast input array from shape (100,100,3) into shape (100,100)

In [17]:
print labels[4,:]

[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(imageData, labels, test_size=0.25, random_state=42)
network = train_model(X_train, y_train, X_test, y_test)

(None, 1, 100, 100)
(None, 100, 50, 50)
(None, 50, 25, 25)
(None, 25, 13, 13)
(None, 800)
(None, 25)
Epoch 1 of 20 took 997.919s
  training loss:		2.179745
  training accuracy:		29.85 %
  validation loss:		2.147997
  validation accuracy:		28.78 %
Epoch 2 of 20 took 813.223s
  training loss:		2.128612
  training accuracy:		30.93 %
  validation loss:		2.139650
  validation accuracy:		29.62 %
Epoch 3 of 20 took 798.240s
  training loss:		2.109776
  training accuracy:		31.47 %
  validation loss:		2.142338
  validation accuracy:		29.45 %
Epoch 4 of 20 took 1475.308s
  training loss:		2.086660
  training accuracy:		32.54 %
  validation loss:		2.153209
  validation accuracy:		28.87 %
Epoch 5 of 20 took 873.564s
  training loss:		2.049530
  training accuracy:		34.39 %
  validation loss:		2.176484
  validation accuracy:		28.49 %
Epoch 6 of 20 took 735.068s
  training loss:		1.985407
  training accuracy:		38.10 %
  validation loss:		2.212893
  validation accuracy:		27.99 %
Epoch 7 of 20 took 100

KeyboardInterrupt: 

In [61]:
print YVal

[[0 0 1 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 ..., 
 [0 0 1 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [1 0 0 ..., 0 0 0]]


In [11]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
print type(XVal)
def testAccuracy(X_test,Y_test):
    X_test = X_test.reshape((-1,1,100,100))
    predictedLabels = lasagne.layers.get_output(network,inputs = X_test).eval()
    return predictedLabels
predictedLabels = testAccuracy(XVal,YVal)

<type 'numpy.ndarray'>


In [12]:
print predictedLabels.shape
print YVal.shape
Y_Pred = np.zeros(predictedLabels.shape,dtype = int)
rows = np.argmax(predictedLabels, axis=1)
for i in range(YVal.shape[0]):
    Y_Pred[i,rows[i]] = 1
print  Y_Pred


(1000, 20)
(1000, 20)
[[1 0 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 ..., 
 [0 1 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]]


In [16]:
Y_test = YVal
accuracyscore = accuracy_score(Y_test, Y_Pred)
f1score = f1_score(Y_test, Y_Pred, average='weighted') 
precisionscore = precision_score(Y_test, Y_Pred, average='weighted')
recallscore = recall_score(Y_test, Y_Pred, average='weighted') 
print("Reported Accuracy is {}".format(accuracyscore))
print("precision_score is {}".format(precisionscore))
print("recall_score is {}".format(recallscore))
print("f1_score is {}".format(f1score))

Reported Accuracy is 0.367
precision_score is 0.349254336852
recall_score is 0.367
f1_score is 0.304249177662
