In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
import numpy as np
import pandas as pd

import warnings
warnings.simplefilter('ignore', FutureWarning)

## Data Pre-Processing

In [11]:
film = pd.read_csv('Resources/clean.csv')
film = film.drop('Unnamed: 0', axis = 1)


In [15]:
size_bins = [0, 2, 5, 7, 10]
size_group_names = ["Bad (<2)", "Not Great (2-5)", "Good (5-7)", "Great (7-10)"]

In [16]:
film['movie scores'] = pd.cut(film['movie score'],size_bins, labels = size_group_names)
del film['movie score']

In [17]:
X = film.drop("movie scores", axis=1)
y = film["movie scores"]
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

(6820, 9) (6820,)


In [30]:
film.head()

Unnamed: 0,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,Total Nominations,Total Awards,movie scores
0,30000000,31743332,79,117268,6.864286,7.1,7.1,2.0,0.0,Great (7-10)
1,35000000,81159365,123,201705,6.875,5.957143,6.2,1.0,0.0,Good (5-7)
2,1500000,779820,102,11945,5.9,5.666667,5.666667,1.0,0.0,Not Great (2-5)
3,40000000,95001343,128,71006,7.5,6.74,6.63,1.0,0.0,Great (7-10)
4,13000000,16574731,93,28791,6.44,5.8,5.8,1.0,0.0,Good (5-7)


# Create a Deep Learning Model

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=9))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [34]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x11a30c450>

## Quantify our Trained Model

In [35]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.4529 - accuracy: 0.8757
Normal Neural Network - Loss: 0.3134642156338062, Accuracy: 0.8756598234176636


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [36]:
# Save the model
model.save("film_model_trained.h5")

# Loading a Model

In [39]:
# Load the model
from tensorflow.keras.models import load_model
film_model = load_model("film_model_trained.h5")

## Evaluating the loaded model

In [40]:
model_loss, model_accuracy = film_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.4529 - accuracy: 0.8757
Normal Neural Network - Loss: 0.3134642156338062, Accuracy: 0.8756598234176636
