https://www.kaggle.com/neha1703/movie-genre-from-its-poster/

### Research on the topic:
**Movie Genre Classification based on Poster Images with Deep Neural Networks**<br>https://www.cs.ccu.edu.tw/~wtchu/papers/2017MUSA-chu.pdf<br>**Movie poster classification into genres based on low-level features**<br>https://ieeexplore.ieee.org/document/6859750

In [1]:
from numpy import array, asarray, ndarray
import numpy as np
import tensorflow as tf

print(tf.__version__)

  from ._conv import register_converters as _register_converters


1.9.0


### multilabel
https://github.com/tholor/keras/commit/29ceafca3c4792cb480829c5768510e4bdb489c5

Note on IMDb ID: https://www.imdb.com/title/tt0114709/<br>**ONLY USING FIRST GENRE**

In [2]:
import os
import shutil

def create_datagen_genre_folders(genre_set):
    folder_path = "datagen_images/"
    for genre in genres_set:
        path = folder_path+genre
        try:
            os.mkdir(path)
        except FileExistsError:
            print("Creation of directory %s failed" % path)
        else:
            print("Successfully created the directory %s" % path)

def create_train_genre_folders(genre_set):
    folder_path = "train_images/"
    for genre in genres_set:
        path = folder_path+genre
        try:
            os.mkdir(path)
        except FileExistsError:
            print("Creation of directory %s failed" % path)
        else:
            print("Successfully created the directory %s" % path)

def create_val_genre_folders(genre_set):
    folder_path = "val_images/"
    for genre in genres_set:
        path = folder_path+genre
        try:
            os.mkdir(path)
        except FileExistsError:
            print("Creation of directory %s failed" % path)
        else:
            print("Successfully created the directory %s" % path)

def delete_datagen_genre_folders(genres_set):
    folder_path = "datagen_images/"
    for g in genres_set:
        path = folder_path+g
        try:
            shutil.rmtree(path)
        except FileNotFoundError:
            print("Deletion of directory %s failed" % path)
        else:
            print("Successfully deleted the directory %s" % path)
            

            
def delete_train_genre_folders(genres_set):
    folder_path = "train_images/"
    for g in genres_set:
        path = folder_path+g
        try:
            shutil.rmtree(path)
        except FileNotFoundError:
            print("Deletion of directory %s failed" % path)
        else:
            print("Successfully deleted the directory %s" % path)
            
def delete_val_genre_folders(genres_set):
    folder_path = "val_images/"
    for g in genres_set:
        path = folder_path+g
        try:
            shutil.rmtree(path)
        except FileNotFoundError:
            print("Deletion of directory %s failed" % path)
        else:
            print("Successfully deleted the directory %s" % path)

In [3]:
import glob
import csv
import cv2

# training controls
batch_size = 25
epochs = 1
training_size = 0.7 # 30% of examples will be used for validation

# input image dimensions
img_rows, img_cols = 268, 182

# data holders
x_test, x_train = [], []
y_test, y_train = [], []
tempY = {}

# open dataset
dataset = csv.reader(open('data/MovieGenre.csv',encoding='utf8',errors='replace'), delimiter=',')

# skip header line
next(dataset)

# list of image files in SampleMoviePosters folder
flist = glob.glob('SampleMoviePosters/*.jpg')

# size of training data
length = int(len(flist)*training_size)

# extract data from CSV file
folder_path = "SampleMoviePosters/"
genres_set = set()
unk_count, tot_count = 0, 0
for imdb_id, link, title, score, genre, poster in dataset:
    g_list = genre.split('|')
    for g in g_list:
        if g == '':
            unk_count += 1
        else:
            genres_set.add(g)
    tempY[int(imdb_id)] = g_list[0]
    tot_count += 1

print("{:d} movies out of {:d} have unspecified genre.".format(unk_count,tot_count))
print("{:d} unique genres (unknown genre not included):\n".format(len(genres_set)))
for g in genres_set:
    print(g)
print()
#print(tempY)


145 movies out of 40108 have unspecified genre.
28 unique genres (unknown genre not included):

Sport
Film-Noir
Sci-Fi
Crime
Short
Adult
Animation
Music
Thriller
Adventure
Drama
Musical
Comedy
Reality-TV
Action
Mystery
War
Family
News
Horror
History
Talk-Show
Fantasy
Western
Biography
Game-Show
Romance
Documentary



In [4]:
#### CREATE IMAGE DATA FOLDERS FOR SORTING ####
create_datagen_genre_folders(genres_set)
create_train_genre_folders(genres_set)
create_val_genre_folders(genres_set)

Creation of directory datagen_images/Sport failed
Creation of directory datagen_images/Film-Noir failed
Creation of directory datagen_images/Sci-Fi failed
Creation of directory datagen_images/Crime failed
Creation of directory datagen_images/Short failed
Creation of directory datagen_images/Adult failed
Creation of directory datagen_images/Animation failed
Creation of directory datagen_images/Music failed
Creation of directory datagen_images/Thriller failed
Creation of directory datagen_images/Adventure failed
Creation of directory datagen_images/Drama failed
Creation of directory datagen_images/Musical failed
Creation of directory datagen_images/Comedy failed
Creation of directory datagen_images/Reality-TV failed
Creation of directory datagen_images/Action failed
Creation of directory datagen_images/Mystery failed
Creation of directory datagen_images/War failed
Creation of directory datagen_images/Family failed
Creation of directory datagen_images/News failed
Creation of directory dat

In [5]:
from IPython.display import Image, display
from shutil import copyfile

# extract image data
i=0
for imdb_id in flist:
    name=int(imdb_id.split('/')[-1][:-4])
    filename='SampleMoviePosters/'+str(name)+'.jpg'
    if i == 0:
        print(filename)
        display(Image(filename))
    if(tempY.get(name) != None):
        img = cv2.imread(filename)
        genre = tempY[name]
        copyfile(filename, 'datagen_images/'+genre+'/'+str(name)+'.jpg')
        if(i<length):
            copyfile(filename, 'train_images/'+genre+'/'+str(name)+'.jpg')
            x_train.append(array(img))
            y_train.append(genre)
        else:
            copyfile(filename, 'val_images/'+genre+'/'+str(name)+'.jpg')
            x_test.append(array(img))
            y_test.append(genre)
    i+=1
    
#converting the data from lists to numpy arrays
x_train = asarray(x_train,dtype=float)
x_test = asarray(x_test,dtype=float)
y_train = asarray(y_train,dtype=str)
y_test = asarray(y_test,dtype=str)

#scaling down the RGB data
x_train /= 255
x_test /= 255

#printing stats about the features
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

print("total examples: {:d}".format(x_train.shape[0]+x_test.shape[0]))

SampleMoviePosters/23556.jpg


<IPython.core.display.Image object>

x_train shape: (697, 268, 182, 3)
697 train samples
300 test samples
total examples: 997


In [10]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=False,
        validation_split=0.3) # 30% of examples used for validation

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        directory='train_images/',
        target_size=(224, 224), # images will be resized for AlexNet
        color_mode='rgb',
        batch_size=32,
        class_mode='categorical')

valid_generator = test_datagen.flow_from_directory(
        directory='val_images/',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


In [9]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras.layers.normalization import BatchNormalization

np.random.seed(1000)

#Instantiate an empty model
model = Sequential()

# 1st Convolutional Layer
model.add(Conv2D(filters=96, input_shape=(224,224,3), kernel_size=(11,11), strides=(4,4), padding='valid'))
model.add(Activation('relu'))
# Max Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))

# 2nd Convolutional Layer
model.add(Conv2D(filters=256, kernel_size=(11,11), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Max Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))

# 3rd Convolutional Layer
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))

# 4th Convolutional Layer
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))

# 5th Convolutional Layer
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='valid'))
model.add(Activation('relu'))
# Max Pooling
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='valid'))

# Passing it to a Fully Connected layer
model.add(Flatten())
# 1st Fully Connected Layer
model.add(Dense(4096, input_shape=(224*224*3,)))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.4))

# 2nd Fully Connected Layer
model.add(Dense(4096))
model.add(Activation('relu'))
# Add Dropout
model.add(Dropout(0.4))

# 3rd Fully Connected Layer
model.add(Dense(1000))
model.add(Activation('relu'))
# Add Dropout
model.add(Dropout(0.4))

# Output Layer
model.add(Dense(17))
model.add(Activation('softmax'))

model.summary()

# Compile the model
model.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])

#model.fit(x_train, y_train,
#          batch_size=batch_size,
#          epochs=epochs,
#validation_data=(x_test, y_test))

#model.fit_generator(
#        train_generator,
#        steps_per_epoch=2000,
#        epochs=20,
#        validation_data=validation_generator,
#        validation_steps=800)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 54, 54, 96)        34944     
_________________________________________________________________
activation_10 (Activation)   (None, 54, 54, 96)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 27, 27, 96)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 17, 17, 256)       2973952   
_________________________________________________________________
activation_11 (Activation)   (None, 17, 17, 256)       0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 256)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 6, 6, 384)         885120    
__________

FileNotFoundError: [Errno 2] No such file or directory: 'train_images/Short/13472.jpg'

In [8]:
#### CLEAN UP ####
delete_datagen_genre_folders(genres_set)
delete_train_genre_folders(genres_set)
delete_val_genre_folders(genres_set)

Successfully deleted the directory datagen_images/Sport
Successfully deleted the directory datagen_images/Film-Noir
Successfully deleted the directory datagen_images/Sci-Fi
Successfully deleted the directory datagen_images/Crime
Successfully deleted the directory datagen_images/Short
Successfully deleted the directory datagen_images/Adult
Successfully deleted the directory datagen_images/Animation
Successfully deleted the directory datagen_images/Music
Successfully deleted the directory datagen_images/Thriller
Successfully deleted the directory datagen_images/Adventure
Successfully deleted the directory datagen_images/Drama
Successfully deleted the directory datagen_images/Musical
Successfully deleted the directory datagen_images/Comedy
Successfully deleted the directory datagen_images/Reality-TV
Successfully deleted the directory datagen_images/Action
Successfully deleted the directory datagen_images/Mystery
Successfully deleted the directory datagen_images/War
Successfully deleted th