### Import Modules

<br>

In [1]:
import requests
import json
import time
import itertools
import wget
import os
import pickle
import numpy as np

import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import precision_recall_curve
import scipy

sns.set_style('white')
import tensorflow as tf
import pandas as pd
import keras
from keras.applications.vgg16 import VGG16
from keras.callbacks import LearningRateScheduler
from keras import callbacks
from keras import regularizers as reg
from keras.optimizers import SGD, Adam
from keras.models import Sequential , Model , load_model
from keras.layers import Dense, Dropout, Flatten, Input, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import keras.initializers as init
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras.models import load_model

Using TensorFlow backend.




<br>
### Open the Preprocessed Poster Data

<br>
Image data is stored in a numpy format and existing on a pickle dump file

<br>

In [2]:
x_test_dict = pickle.load(open('vgg16_test_num2.pik' , 'rb'))



<br>
### Extract data

<br>

In [3]:
# Extract the input arrays from the data container
x_test_raw = x_test_dict['images']

# transform into numpy array
x_test  = np.array(x_test_raw)




<br>
### Prepare Data for input

<br>
 - Determine the data axis order according tothe keras configuration
 - Define the input and its attributes
 - Center input data and ensure its type as float 
 - Pre-process labels
 
 <br>
 
** 1 - Arrange Data for Input:**

<br>

In [4]:
# Define the image number of rows
img_rows = x_test.shape[1]

# Dfine the image number of columns
img_cols = x_test.shape[2]

# Re-arrange if channels first configuration
if K.image_data_format() == 'channels_first':
    x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
    
    # Define Input Shape
    input_shape = (3, img_rows, img_cols)

# Re-arrange if channels last configuration
else:
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
    
    # Define Input shape
    input_shape = (img_rows, img_cols, 3)


<br>
** 2 - Pre-process Data: **

<br>

In [5]:
# Transform to float
x_test  = x_test.astype('float32')

# Center the data
x_test  /= 255.0

# Display test data attributes
print 'x_test shape:', x_test.shape
print  x_test.shape[0], 'test samples'

x_test shape: (1001, 224, 149, 3)
1001 test samples



<br>
** 3 - Output Data Pre-processing : **

<br>

In [6]:
# Extract Labels from File
y_raw = pd.read_csv('Genres_labels_All_test_cleaned2.csv')

# Split into train and test
y_test  = y_raw.iloc[:, 1:-1].values

# Define the number of classes
num_classes = y_test.shape[1]

print 'number of classes:  ' , num_classes

number of classes:   14



<br>
### Model Construction


<br>
** 1 - Import model: **

<br>

In [7]:
model = load_model('vgg16_2.h5')


<br>
** 2 - Perform Evaluation: **

<br>

In [11]:
loss , overall_acc = model.evaluate(x_test , y_test , batch_size = 16 , verbose = 1)



In [12]:
overall_acc

0.83916096325282685


<br>
** 3 - Perform prediction:**

<br>

In [13]:
y_predict = model.predict(x_test , batch_size = 16 , verbose = 1)



In [14]:
# Calculate absolute error
abs_err = np.abs(y_test - y_predict)

# Calculate pprediction proximity measure (1 - absolute error) for visualization
prox = 1.0 - abs_err


<br>
### Export Results to File:

<br>
** 1 - Export prediction : **

<br>

In [15]:
# Construct data frame
proxdf = pd.DataFrame(prox , columns = y_raw.columns[1:-1]).iloc[:100 , :]

proxdf.head()

Unnamed: 0,Action,Adventure,Animation,Comedy,Crime,Drama,Family,History,Horror,Fantasy_Mystery,Romance,Science Fiction,Thriller,Other
0,0.803529,0.081861,0.956873,0.697093,0.885304,0.532725,0.926014,0.037256,0.831142,0.876231,0.853815,0.914309,0.762955,0.895208
1,0.757463,0.890704,0.936856,0.7131,0.135609,0.542985,0.905118,0.942459,0.232795,0.840973,0.843491,0.876127,0.29547,0.867242
2,0.804159,0.907894,0.94819,0.659484,0.879976,0.447333,0.913597,0.961436,0.834918,0.876785,0.841019,0.910418,0.781981,0.889937
3,0.745301,0.893364,0.940551,0.756476,0.869903,0.463411,0.908536,0.936392,0.734735,0.823106,0.85695,0.864241,0.661507,0.85621
4,0.292146,0.167363,0.11113,0.688136,0.822628,0.54756,0.850347,0.901248,0.709675,0.788062,0.802473,0.816224,0.665415,0.81597


In [16]:

# Write to File
proxdf.to_csv('simple_prox.csv')


<br>
** 2 - Export proximity **

<br>

In [17]:
# Construct data frame
y_preddf = pd.DataFrame(y_predict , columns = y_raw.columns[1:-1])

# Export File
y_preddf.to_csv('simplepred.csv')