### Import Modules

<br>

In [1]:
import requests
import json
import time
import itertools
import wget
import os
import pickle
import numpy as np

import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import precision_recall_curve
import scipy

sns.set_style('white')
import tensorflow as tf
import pandas as pd
import keras
from keras.applications.vgg16 import VGG16
from keras.callbacks import LearningRateScheduler
from keras import callbacks
from keras import regularizers as reg
from keras.optimizers import SGD, Adam
from keras.models import Sequential , Model , load_model
from keras.layers import Dense, Dropout, Flatten, Input, concatenate
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import keras.initializers as init
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras.models import load_model

Using TensorFlow backend.




<br>
### Open the Preprocessed Poster Data

<br>
Image data is stored in a numpy format and existing on a pickle dump file and numerical features in a csv file

<br>
** 1 - Image Features:**

<br>

In [2]:
x_test_dict = pickle.load(open('test_num.pik' , 'rb'))


<br>
** 2 - Color Numerical Features: **

<br>

In [3]:
# Open File
x_test_raw = pd.read_csv('Features_test.csv')

# Select features
# x_data = x_train_raw.loc[: , ['mR' , 'mG' , 'mB' , 'vardR' , 'vardG' , 'vardB' , 'De1' , 'De2' , 'De3']].values
x_data = x_test_raw.loc[: , ['mR' , 'mG' , 'mB' , 'stdR' , 'stdG' , 'stdB' , 'Dc1' , 'Dc2' , 'Dc3']].values



<br>
### Specify the training/test split

<br>
 - The data will be split into training and test for validation purpose
 - Data augmentation will be subsequently applied for both training and test
 
 <br>

In [4]:

# Define training/test data for image data:
# -----------------------------------------
# Extract the input arrays from the data container
xi_test_raw = x_test_dict['images']

# Apply the test / train split
xi_test  = np.array(xi_test_raw)


# Define test data for numerical data:
# ------------------------------------
# Apply the test / train split
xn_test  = x_data

print 'Image Data Shape : ' , xi_test.shape
print 'Numerical Data Shape : ' , xn_test.shape


Image Data Shape :  (1001, 128, 85, 3)
Numerical Data Shape :  (1001, 9)




<br>
### Prepare Data for input

<br>
 - Determine the data axis order according tothe keras configuration
 - Define the input and its attributes
 - Center input data and ensure its type as float 
 - Pre-process labels
 
 <br>
 
** 1 - Arrange Data for Input:**

<br>

** a - Image Data **

<br>

In [5]:
# Define the image number of rows
img_rows = xi_test.shape[1]

# Dfine the image number of columns
img_cols = xi_test.shape[2]

# Re-arrange if channels first configuration
if K.image_data_format() == 'channels_first':
    xi_test = xi_test.reshape(xi_test.shape[0], 3, img_rows, img_cols)
    
    # Define Input Shape
    input_shape = (3, img_rows, img_cols)

# Re-arrange if channels last configuration
else:
    xi_test = xi_test.reshape(xi_test.shape[0], img_rows, img_cols, 3)
    
    # Define Input shape
    input_shape = (img_rows, img_cols, 3)




<br>
** b - Numerical Data:**

<br>

In [6]:
# Dfine the data number of columns
dat_cols = xn_test.shape[1]



<br>
** 2 - Pre-process Data: **

<br>

In [7]:
# Image Data:
# ----------

# Transform to float
xi_test  = xi_test.astype('float32')

# Center the data
xi_test  /= 255.0

# Numerical Data:
# ---------------

# Transform to float
xn_test  = xn_test.astype('float32')

# Center the data
xn_test  -= xn_test.mean(0)

# Scale the data
xn_test  /= xn_test.std(0)



# Display image train Data attributes
print 'Image Test shape:', xi_test.shape
print  xi_test.shape[0], 'test samples'

# Display numeric train data attributes
print 'Numeric Test shape:', xn_test.shape
print  xn_test.shape[0], 'test samples'

Image Test shape: (1001, 128, 85, 3)
1001 test samples
Numeric Test shape: (1001, 9)
1001 test samples



<br>
** 3 - Output Data Pre-processing : **

<br>

In [8]:
# Extract Labels from File
y_raw = pd.read_csv('Genres_labels_All_test_cleaned2.csv')

# Split into train and test
y_test  = y_raw.iloc[:, 1:-1].values

# Define the number of classes
num_classes = y_test.shape[1]

print 'number of classes:  ' , num_classes

number of classes:   14



<br>
### Model Construction

<br>
 - Upload model
 - Evaluate and predict

<br>
** 1 - Load Model: **

<br>

In [10]:
model = load_model('comb2_2.h5')


<br>
** 2 -Evaluate model Model:**

<br>

In [11]:
loss , overall_acc = model.evaluate({'imag_in' : xi_test , 'num_in' : xn_test } , {'comb_out' : y_test} , 
                                    batch_size = 16 , verbose = 1)



In [12]:
overall_acc

0.83851874982203156


<br>
** 3 - generate pridictions:**

<br>

In [13]:
y_predict = model.predict({'imag_in' : xi_test , 'num_in' : xn_test } , batch_size = 16 , verbose = 1)



In [14]:
# Calculate absolute error
abs_err = np.abs(y_test - y_predict)

# Calculate pprediction proximity measure (1 - absolute error) for visualization
prox = 1.0 - abs_err


<br>
### Export Results to File:

<br>
** 1 - Export proximity : **

<br>

In [15]:
# Construct data frame
proxdf = pd.DataFrame(prox , columns = y_raw.columns[1:-1]).iloc[:100 , :]

proxdf.head()

Unnamed: 0,Action,Adventure,Animation,Comedy,Crime,Drama,Family,History,Horror,Fantasy_Mystery,Romance,Science Fiction,Thriller,Other
0,0.688529,0.229071,0.791669,0.602198,0.751309,0.532789,0.769648,0.197894,0.692969,0.763752,0.694492,0.75747,0.661929,0.745647
1,0.662067,0.725738,0.738332,0.573834,0.29229,0.534625,0.725426,0.748806,0.347365,0.701693,0.650856,0.710061,0.369363,0.702646
2,0.682454,0.773011,0.761736,0.602712,0.748339,0.451948,0.760634,0.784908,0.684295,0.743644,0.684618,0.740243,0.623482,0.741013
3,0.707502,0.807916,0.825127,0.625958,0.761325,0.448137,0.796048,0.817877,0.708494,0.781183,0.730641,0.775859,0.661675,0.779525
4,0.318683,0.267216,0.233159,0.567161,0.7303,0.519631,0.760861,0.800282,0.690217,0.737702,0.691315,0.744089,0.643258,0.741278


In [16]:

# Write to File
proxdf.to_csv('comb2_prox.csv')


<br>
** 2 - Export Prediction **

<br>

In [17]:
# Construct data frame
y_preddf = pd.DataFrame(y_predict , columns = y_raw.columns[1:-1])

# Export File
y_preddf.to_csv('comb2pred.csv')