In [1]:
import pandas as pd
import numpy as np
import pickle

from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Activation
from keras.layers import Conv2D, MaxPooling2D, LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [2]:
# preprocess dataframe
poster_dir = "../data/posters/"
poster_df = pd.read_csv("../data/MovieGenre.csv", encoding = "ISO-8859-1")
poster_df = poster_df.drop_duplicates(subset=['imdbId'], keep="last")
poster_df["Genre"] = poster_df["Genre"].str.split("|")
poster_df["filename"] = poster_df["imdbId"].astype(str) + ".jpg"
poster_df = poster_df[poster_df["Genre"].apply(lambda t: isinstance(t, list))]

In [3]:
# create image data generator
datagen = ImageDataGenerator(rescale=1./255., horizontal_flip=True, validation_split=0.25)

train_generator = datagen.flow_from_dataframe(dataframe=poster_df,
                                              directory=poster_dir,
                                              x_col="filename",
                                              y_col="Genre",
                                              subset="training",
                                              batch_size=500,
                                              shuffle=True,
                                              seed=42,
                                              class_mode="categorical",
                                              target_size=(182, 268))


valid_generator=datagen.flow_from_dataframe(dataframe=poster_df,
                                            directory=poster_dir,
                                            x_col="filename",
                                            y_col="Genre",
                                            subset="validation",
                                            batch_size=500,
                                            shuffle=True,
                                            seed=42,
                                            class_mode="categorical",
                                            target_size=(182, 268))

num_classes = len(train_generator.class_indices)
print(train_generator.class_indices)



Found 27333 validated image filenames belonging to 28 classes.
Found 9110 validated image filenames belonging to 28 classes.
{'Action': 0, 'Adult': 1, 'Adventure': 2, 'Animation': 3, 'Biography': 4, 'Comedy': 5, 'Crime': 6, 'Documentary': 7, 'Drama': 8, 'Family': 9, 'Fantasy': 10, 'Film-Noir': 11, 'Game-Show': 12, 'History': 13, 'Horror': 14, 'Music': 15, 'Musical': 16, 'Mystery': 17, 'News': 18, 'Reality-TV': 19, 'Romance': 20, 'Sci-Fi': 21, 'Short': 22, 'Sport': 23, 'Talk-Show': 24, 'Thriller': 25, 'War': 26, 'Western': 27}




In [None]:
# use CNN model for classification
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(182,268,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(64, (3, 3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(64, (3, 3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(LeakyReLU(alpha=0.1))
model.add(Conv2D(128, (3, 3)))
model.add(LeakyReLU(alpha=0.1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

adam = Adam(learning_rate=1e-4, clipnorm=1.0)
callback = EarlyStopping(patience=2)
model.compile(optimizer=adam, loss='categorical_crossentropy',
              metrics=['accuracy'])

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

history = model.fit(x=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    epochs=10,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    callbacks=[callback])

Epoch 1/10
 6/54 [==>...........................] - ETA: 1:44:29 - loss: 7.4232 - accuracy: 0.1647

In [None]:
# save the model for deployment
pickle.dump(model, open('poster_predictor.pkl', 'wb+'))

Reference:
1. https://godatadriven.com/blog/keras-multi-label-classification-with-imagedatagenerator/