In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

## Data Pre-Processing

In [3]:
film = pd.read_csv('Resources/clean_genre.csv')
film = film.drop('Unnamed: 0', axis = 1)
film = film.drop('Total Nominations', axis =1)
film.head()

Unnamed: 0,movie score,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,rating avg score,country avg score,company avg score,Total Awards
0,7.1,30000000,31743332,79,117268,6.864286,7.1,7.1,6.746931,6.254035,6.21679,6.839474,0.0
1,6.8,35000000,81159365,123,201705,6.875,5.957143,6.2,6.756322,6.254035,6.21679,6.315741,0.0
2,5.0,1500000,779820,102,11945,5.9,5.666667,5.666667,5.687004,6.453213,6.660602,5.0,0.0
3,7.5,40000000,95001343,128,71006,7.5,6.74,6.63,7.04039,6.254035,6.21679,6.384354,0.0
4,5.8,13000000,16574731,93,28791,6.44,5.8,5.8,6.71572,6.453213,6.21679,5.8,0.0


In [4]:
film = film.rename(columns = {"Total Awards":"Award"})

film['Award'].loc[film['Award'] >= 1] = "Yes"
film['Award'].loc[film['Award'] == 0] = "No"

film.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,movie score,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,rating avg score,country avg score,company avg score,Award
0,7.1,30000000,31743332,79,117268,6.864286,7.1,7.1,6.746931,6.254035,6.21679,6.839474,No
1,6.8,35000000,81159365,123,201705,6.875,5.957143,6.2,6.756322,6.254035,6.21679,6.315741,No
2,5.0,1500000,779820,102,11945,5.9,5.666667,5.666667,5.687004,6.453213,6.660602,5.0,No
3,7.5,40000000,95001343,128,71006,7.5,6.74,6.63,7.04039,6.254035,6.21679,6.384354,No
4,5.8,13000000,16574731,93,28791,6.44,5.8,5.8,6.71572,6.453213,6.21679,5.8,No
5,7.7,65000000,210614939,117,648211,6.53,7.057143,6.666667,6.106086,6.453213,6.21679,6.384354,No
6,6.2,0,99603,110,14114,6.61,7.25,7.225,6.71572,6.453213,6.660602,6.796296,No
7,6.8,25000000,71985628,104,226504,6.615385,6.78,6.8,5.687004,6.254035,6.21679,5.726316,No
8,5.8,0,5979011,95,3154,7.029412,5.8,5.75,6.71572,6.453213,6.21679,5.34,No
9,5.8,200000000,166112167,158,303766,6.615385,6.227273,5.766667,6.106086,6.254035,6.21679,6.315741,No


In [5]:
X = film.drop("Award", axis=1)
y = film["Award"]
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

(6820, 12) (6820,)


## Create a Deep Learning Model

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=18, activation='relu', input_dim=12))
model.add(Dense(units=18, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 18)                234       
_________________________________________________________________
dense_1 (Dense)              (None, 18)                342       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 38        
Total params: 614
Trainable params: 614
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [9]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Train on 5115 samples
Epoch 1/60
5115/5115 - 1s - loss: 0.3417 - accuracy: 0.9441
Epoch 2/60
5115/5115 - 0s - loss: 0.2060 - accuracy: 0.9449
Epoch 3/60
5115/5115 - 0s - loss: 0.1858 - accuracy: 0.9449
Epoch 4/60
5115/5115 - 0s - loss: 0.1767 - accuracy: 0.9449
Epoch 5/60
5115/5115 - 0s - loss: 0.1716 - accuracy: 0.9449
Epoch 6/60
5115/5115 - 0s - loss: 0.1684 - accuracy: 0.9449
Epoch 7/60
5115/5115 - 0s - loss: 0.1660 - accuracy: 0.9449
Epoch 8/60
5115/5115 - 0s - loss: 0.1647 - accuracy: 0.9449
Epoch 9/60
5115/5115 - 0s - loss: 0.1638 - accuracy: 0.9449
Epoch 10/60
5115/5115 - 0s - loss: 0.1641 - accuracy: 0.9449
Epoch 11/60
5115/5115 - 0s - loss: 0.1631 - accuracy: 0.9449
Epoch 12/60
5115/5115 - 0s - loss: 0.1620 - accuracy: 0.9449
Epoch 13/60
5115/5115 - 0s - loss: 0.1631 - accuracy: 0.9449
Epoch 14/60
5115/5115 - 0s - loss: 0.1640 - accuracy: 0.9449
Epoch 15/60
5115/5115 - 0s - loss: 0.1619 - accuracy: 0.9449
Epoch 16/60
5115/5115 - 0s - loss: 0.1611 - accuracy: 0.9449
Epoch 17/60

<tensorflow.python.keras.callbacks.History at 0x1a3eefcfd0>

## Quantify our Trained Model

In [10]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.2270 - accuracy: 0.9390
Normal Neural Network - Loss: 0.1845116556914321, Accuracy: 0.9390029311180115


# Loading a Model

In [11]:
# Load the model
from tensorflow.keras.models import load_model
film_model = load_model("film_model_trained.h5")

## Make Predictions

In [12]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [13]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['No' 'No' 'No' 'No' 'No']
Actual Labels: ['No', 'No', 'No', 'No', 'No']
