In [21]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as tfl
from sklearn.metrics import classification_report

In [22]:
film_data = pd.read_csv('movie.csv', encoding = 'latin-1')
remove_data = pd.read_csv('NoPosterFound_Links.csv')
genre_data = film_data['genres'].to_numpy()
id_data = film_data['movieId'].to_numpy()
l = []
for i in genre_data:
    l = l + str(i).split('|')
s = list(set(l))
s.sort()
s.pop(0)
remove_data_id = remove_data['movieId'].to_numpy()
print(id_data[10000])
film_data.head(5)
remove_data.head(5)

32892


Unnamed: 0,movieId,imdbId,tmdbId
0,604,115978,
1,644,116485,339428.0
2,654,118026,278978.0
3,721,114103,
4,727,114894,365371.0


In [6]:
print(len(genre_data))
print(genre_data[1])
genres = np.zeros((26938, 19))
for i in range(len(genre_data)):
    for j in str(genre_data[i]).split('|'):
        for k in range(len(s)):
            if j == s[k]:
                genres[i][k] = 1
print(genres.shape)

26938
Adventure|Children|Fantasy
(26938, 19)


In [7]:
index = []
for i in remove_data_id:
    index.append(np.where(id_data == i))
id_data = np.delete(id_data, index)
genre_data = np.delete(genre_data, index)
print(genres.shape)

(26938, 19)


In [8]:
dataset_training = tf.keras.preprocessing.image_dataset_from_directory(
        directory = 'MLP-20M', labels = genres[:].tolist(), label_mode = 'int',
        color_mode='rgb', batch_size=32, image_size=(150,
        150), validation_split = 0.3, subset = "training", shuffle=True, seed=42)
dataset_test = tf.keras.preprocessing.image_dataset_from_directory(
        directory = 'MLP-20M', labels = genres[:].tolist(), label_mode = 'int',
        color_mode='rgb', batch_size=32, image_size=(150,
        150), validation_split = 0.3, subset = "validation", shuffle=True, seed=42)

Found 26938 files belonging to 1 classes.
Using 18857 files for training.
Found 26938 files belonging to 1 classes.
Using 8081 files for validation.


In [9]:
def poster_model():
    poster_shape = (150, 150, 3)
    base_model = tf.keras.applications.ResNet50(include_top = False, weights = 'imagenet', input_shape = poster_shape)
    base_model.trainable = False
    input_layer = tf.keras.Input(poster_shape)
    x = tf.keras.applications.resnet.preprocess_input(input_layer)
    x = base_model(x, training = False)
    x = tfl.GlobalAveragePooling2D()(x)
    x = tfl.Dropout(0.2)(x)
    prediction_layer = tfl.Dense(19, 'sigmoid')
    outputs = prediction_layer(x) 
    model = tf.keras.Model(input_layer, outputs)
    return model

In [10]:
model = poster_model()
print(model.summary())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 150, 150, 3)]     0         
_________________________________________________________________
tf.__operators__.getitem (Sl (None, 150, 150, 3)       0         
_________________________________________________________________
tf.nn.bias_add (TFOpLambda)  (None, 150, 150, 3)       0         
_________________________________________________________________
resnet50 (Functional)        (None, 5, 5, 2048)        23587712  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 19)                38931 

In [11]:
base_learning_rate = 0.01
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])

In [12]:
initial_epochs = 3
history = model.fit(dataset_training , epochs=initial_epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [13]:
model.evaluate(dataset_test)
test_pred = model.predict(dataset_test)



[0.6482888460159302, 0.1635936200618744]

In [17]:
single_poster_check = tf.keras.preprocessing.image_dataset_from_directory(
        directory = 'single_test_image',  label_mode = None,
        color_mode='rgb', batch_size=32, image_size=(150,150) ,shuffle=True, seed=42)


Found 1 files belonging to 1 classes.


In [18]:
single_pred = np.where(single_poster_check > 0.5, s)
print(single_pred)

[[9.98530626e-01 9.64655280e-01 5.47164679e-03 1.45521586e-08
  6.99463248e-01 3.93417478e-03 1.67012215e-04 4.15668160e-01
  2.77617382e-05 1.06883266e-10 3.28252614e-02 1.31131260e-11
  9.88863321e-05 4.30333912e-02 9.70454931e-01 3.43249440e-02
  7.38859177e-02 6.50233414e-05 2.52950191e-03]]


In [17]:
test_pred_bool = np.where(test_pred[:, 0] > 0.5, 1, 0)
for i in range(1, 19):
    temp = np.where(test_pred[:, i] > 0.5, 1, 0)
    test_pred_bool = np.vstack([test_pred_bool, temp])
print(test_pred_bool.shape)

(19, 8081)


In [18]:
y = np.concatenate([y for x, y in dataset_test], axis=0)

In [19]:
for i in range(19):
    print(s[i])
    print(classification_report(y[:,i], test_pred_bool[i]))

Action
              precision    recall  f1-score   support

         0.0       0.87      0.88      0.88      7086
         1.0       0.11      0.10      0.10       995

    accuracy                           0.79      8081
   macro avg       0.49      0.49      0.49      8081
weighted avg       0.78      0.79      0.78      8081

Adventure
              precision    recall  f1-score   support

         0.0       0.91      0.75      0.82      7396
         1.0       0.08      0.25      0.12       685

    accuracy                           0.71      8081
   macro avg       0.50      0.50      0.47      8081
weighted avg       0.84      0.71      0.76      8081

Animation
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      7776
         1.0       0.04      0.00      0.01       305

    accuracy                           0.96      8081
   macro avg       0.50      0.50      0.49      8081
weighted avg       0.93      0.96      0.94    

In [20]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("model.h5")
