In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
import numpy as np
import pandas as pd

import warnings
warnings.simplefilter('ignore', FutureWarning)

## Data Pre-Processing

In [4]:
film = pd.read_csv('csv/clean.csv')
film = film.drop('Unnamed: 0', axis = 1)


In [5]:
size_bins = [0, 2, 5, 7, 10]
size_group_names = ["Bad (<2)", "Not Great (2-5)", "Good (5-7)", "Great (7-10)"]

In [6]:
film['movie scores'] = pd.cut(film['movie score'],size_bins, labels = size_group_names)
del film['movie score']

In [7]:
X = film.drop("movie scores", axis=1)
y = film["movie scores"]
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

(6820, 13) (6820,)


In [8]:
film.head()

Unnamed: 0,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,rating avg score,country avg score,company avg score,Total Nominations,Total Awards,movie scores
0,30000000,31743332,79,117268,6.864286,7.1,7.1,6.746931,6.254035,6.21679,6.839474,2.0,0.0,Great (7-10)
1,35000000,81159365,123,201705,6.875,5.957143,6.2,6.756322,6.254035,6.21679,6.315741,1.0,0.0,Good (5-7)
2,1500000,779820,102,11945,5.9,5.666667,5.666667,5.687004,6.453213,6.660602,5.0,1.0,0.0,Not Great (2-5)
3,40000000,95001343,128,71006,7.5,6.74,6.63,7.04039,6.254035,6.21679,6.384354,1.0,0.0,Great (7-10)
4,13000000,16574731,93,28791,6.44,5.8,5.8,6.71572,6.453213,6.21679,5.8,1.0,0.0,Good (5-7)


# Create a Deep Learning Model

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=13))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=4, activation='softmax'))

In [22]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Train on 5115 samples
Epoch 1/60
5115/5115 - 1s - loss: 0.3089 - accuracy: 0.8706
Epoch 2/60
5115/5115 - 1s - loss: 0.3096 - accuracy: 0.8712
Epoch 3/60
5115/5115 - 1s - loss: 0.3109 - accuracy: 0.8692
Epoch 4/60
5115/5115 - 1s - loss: 0.3090 - accuracy: 0.8723
Epoch 5/60
5115/5115 - 1s - loss: 0.3030 - accuracy: 0.8757
Epoch 6/60
5115/5115 - 1s - loss: 0.3104 - accuracy: 0.8719
Epoch 7/60
5115/5115 - 1s - loss: 0.3111 - accuracy: 0.8766
Epoch 8/60
5115/5115 - 1s - loss: 0.3101 - accuracy: 0.8702
Epoch 9/60
5115/5115 - 1s - loss: 0.3047 - accuracy: 0.8753
Epoch 10/60
5115/5115 - 1s - loss: 0.3050 - accuracy: 0.8768
Epoch 11/60
5115/5115 - 1s - loss: 0.3056 - accuracy: 0.8759
Epoch 12/60
5115/5115 - 1s - loss: 0.3029 - accuracy: 0.8759
Epoch 13/60
5115/5115 - 1s - loss: 0.3039 - accuracy: 0.8759
Epoch 14/60
5115/5115 - 1s - loss: 0.3026 - accuracy: 0.8786
Epoch 15/60
5115/5115 - 1s - loss: 0.3025 - accuracy: 0.8733
Epoch 16/60
5115/5115 - 1s - loss: 0.3022 - accuracy: 0.8778
Epoch 17/60

<tensorflow.python.keras.callbacks.History at 0x1a3fdb53d0>

## Quantify our Trained Model

In [13]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.4305 - accuracy: 0.8716
Normal Neural Network - Loss: 0.32035887115512074, Accuracy: 0.8715542554855347


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [14]:
# Save the model
model.save("film_model_trained.h5")

# Loading a Model

In [15]:
# Load the model
from tensorflow.keras.models import load_model
film_model = load_model("film_model_trained.h5")

## Evaluating the loaded model

In [16]:
model_loss, model_accuracy = film_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.4305 - accuracy: 0.8716
Normal Neural Network - Loss: 0.32035887115512074, Accuracy: 0.8715542554855347


In [43]:
d = {'budget': [11000000], 'gross': [266000000], 'runtime': [132], 'votes': [415741], 'star avg score': [7.725], 'director avg score': [7.775], 'writer avg score': [6.378], 'genre avg score': [5.778], 'rating avg score': [5], 'country avg score': [7.345], 'company avg score': [8.6], 'Total Nominations': [9], 'Total Awards': [6], }
df = pd.DataFrame(data=d)
df

Unnamed: 0,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,rating avg score,country avg score,company avg score,Total Nominations,Total Awards
0,11000000,266000000,132,415741,7.725,7.775,6.378,5.778,5,7.345,8.6,9,6


In [85]:
da = [[11000000,266000000, 132, 415741,  7.725, 7.775, 6.378, 5.778, 5, 7.345, 8.6, 9, 6 ]]

In [86]:
test = model.predict(da)

In [90]:
test

array([[0., 0., 1., 0.]], dtype=float32)

In [94]:
t = [(1100,26600, 132, 415,  1 , 1, 1, 1, 1, 1, 1, 9, 6 )]

In [97]:
test = model.predict_classes(t)

In [98]:
test

array([2])