In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
import numpy as np
import pandas as pd


## Data Pre-Processing

In [3]:
film = pd.read_csv('csv/clean.csv')
film = film.drop('Unnamed: 0', axis = 1)
film = film.drop('rating avg score', axis = 1)
film = film.drop('country avg score', axis = 1)
film = film.drop('Total Awards', axis = 1)


In [4]:
size_bins = [0, 4, 6, 7, 8, 10]
size_group_names = ["<4", "4-6", "6-7", "7-8", "8-10"]

In [5]:
film['movie scores'] = pd.cut(film['movie score'],size_bins, labels = size_group_names)
del film['movie score']

In [6]:
film.loc[film['movie scores'] == '8-10']

Unnamed: 0,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,company avg score,Total Nominations,movie scores
29,20000000,56671993,134,503763,6.933333,7.633333,6.566667,7.040390,6.042424,15.0,8-10
61,0,6532908,170,250762,8.044444,8.300000,8.300000,6.354847,8.400000,1.0,8-10
68,1000000,238507,88,41640,8.100000,7.766667,7.766667,6.756322,8.100000,1.0,8-10
99,58000000,170742341,135,681379,7.011111,6.840000,6.725000,7.040390,6.396358,13.0,8-10
183,500000,7098492,123,170427,8.400000,8.050000,8.050000,6.715720,8.400000,3.0,8-10
...,...,...,...,...,...,...,...,...,...,...,...
6659,3300000,13092000,107,503754,6.500000,8.350000,8.350000,6.715720,6.840000,6.0,8-10
6718,0,3210139,128,51522,8.100000,6.740000,7.366667,6.715720,7.700000,1.0,8-10
6776,0,1136776,173,14617,8.100000,8.100000,8.100000,6.715720,8.100000,1.0,8-10
6795,0,5017246,106,54503,8.050000,8.500000,8.500000,6.746931,8.500000,1.0,8-10


In [7]:
X = film.drop("movie scores", axis=1)
y = film["movie scores"]
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

(6820, 10) (6820,)


In [8]:
film.head()

Unnamed: 0,budget,gross,runtime,votes,star avg score,director avg score,writer avg score,genre avg score,company avg score,Total Nominations,movie scores
0,30000000,31743332,79,117268,6.864286,7.1,7.1,6.746931,6.839474,2.0,7-8
1,35000000,81159365,123,201705,6.875,5.957143,6.2,6.756322,6.315741,1.0,6-7
2,1500000,779820,102,11945,5.9,5.666667,5.666667,5.687004,5.0,1.0,4-6
3,40000000,95001343,128,71006,7.5,6.74,6.63,7.04039,6.384354,1.0,7-8
4,13000000,16574731,93,28791,6.44,5.8,5.8,6.71572,5.8,1.0,4-6


# Create a Deep Learning Model

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=30, activation='relu', input_dim=10))
model.add(Dense(units=30, activation='relu'))
model.add(Dense(units=5, activation='softmax'))

In [10]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5115 samples
Epoch 1/100
5115/5115 - 1s - loss: 1.2958 - accuracy: 0.3722
Epoch 2/100
5115/5115 - 0s - loss: 1.1800 - accuracy: 0.4239
Epoch 3/100
5115/5115 - 0s - loss: 1.0720 - accuracy: 0.5441
Epoch 4/100
5115/5115 - 0s - loss: 0.9024 - accuracy: 0.6774
Epoch 5/100
5115/5115 - 0s - loss: 0.7716 - accuracy: 0.7283
Epoch 6/100
5115/5115 - 0s - loss: 0.6889 - accuracy: 0.7599
Epoch 7/100
5115/5115 - 0s - loss: 0.6429 - accuracy: 0.7627
Epoch 8/100
5115/5115 - 0s - loss: 0.6034 - accuracy: 0.7679
Epoch 9/100
5115/5115 - 0s - loss: 0.5753 - accuracy: 0.7738
Epoch 10/100
5115/5115 - 0s - loss: 0.5603 - accuracy: 0.7777
Epoch 11/100
5115/5115 - 0s - loss: 0.5466 - accuracy: 0.7832
Epoch 12/100
5115/5115 - 0s - loss: 0.5391 - accuracy: 0.7851
Epoch 13/100
5115/5115 - 0s - loss: 0.5298 - accuracy: 0.7894
Epoch 14/100
5115/5115 - 0s - loss: 0.5313 - accuracy: 0.7865
Epoch 15/100
5115/5115 - 0s - loss: 0.5166 - accuracy: 0.7932
Epoch 16/100
5115/5115 - 0s - loss: 0.5131 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1a47011d90>

## Quantify our Trained Model

In [11]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.9493 - accuracy: 0.7672
Normal Neural Network - Loss: 0.5306910063164675, Accuracy: 0.7671554088592529


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [12]:
# Save the model
model.save("film_model_trained.h5")

# Loading a Model

In [13]:
# Load the model
from tensorflow.keras.models import load_model
film_model = load_model("film_model_trained.h5")

## Evaluating the loaded model

In [14]:
model_loss, model_accuracy = film_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.9493 - accuracy: 0.7672
Normal Neural Network - Loss: 0.5306910063164675, Accuracy: 0.7671554088592529


# Predictions

In [15]:
parasite = np.array([[11000000,266000000, 132, 415741,  7.725, 7.775, 6.378, 5.778, 8.6, 9 ]])

In [16]:
print(f"Predicted Score : {model.predict_classes(parasite)}")

Predicted Score : [2]


In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                330       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                930       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 155       
Total params: 1,415
Trainable params: 1,415
Non-trainable params: 0
_________________________________________________________________
