In [47]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [48]:
import numpy as np
import pandas as pd


## Data Pre-Processing

In [49]:
film = pd.read_csv('csv/clean.csv')
film = film.drop('Unnamed: 0', axis = 1)
film = film.drop('rating avg score', axis = 1)
film = film.drop('country avg score', axis = 1)
film = film.drop('Total Awards', axis = 1)
film = film.drop('gross', axis = 1)
film = film.drop('runtime', axis = 1)
film = film.drop('genre avg score', axis = 1)
film = film.drop('Total Nominations', axis = 1)

In [50]:
size_bins = [0, 4, 6, 7, 8, 10]
size_group_names = ["<4", "4-6", "6-7", "7-8", "8-10"]

In [51]:
film['movie scores'] = pd.cut(film['movie score'],size_bins, labels = size_group_names)
del film['movie score']

In [52]:
#film.loc[film['movie scores'] == '8-10']

In [53]:
X = film.drop("movie scores", axis=1)
y = film["movie scores"]
print(X.shape, y.shape)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

(6820, 6) (6820,)


# Create a Deep Learning Model

In [56]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=10, activation='relu', input_dim=6))
model.add(Dense(units=10, activation='relu'))
model.add(Dense(units=5, activation='softmax'))

In [57]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5115 samples
Epoch 1/100
5115/5115 - 1s - loss: 1.3440 - accuracy: 0.3977
Epoch 2/100
5115/5115 - 0s - loss: 1.2030 - accuracy: 0.4440
Epoch 3/100
5115/5115 - 0s - loss: 1.1485 - accuracy: 0.5210
Epoch 4/100
5115/5115 - 0s - loss: 1.0660 - accuracy: 0.5806
Epoch 5/100
5115/5115 - 0s - loss: 0.9738 - accuracy: 0.6309
Epoch 6/100
5115/5115 - 0s - loss: 0.8946 - accuracy: 0.6686
Epoch 7/100
5115/5115 - 0s - loss: 0.8339 - accuracy: 0.7021
Epoch 8/100
5115/5115 - 0s - loss: 0.7859 - accuracy: 0.7202
Epoch 9/100
5115/5115 - 0s - loss: 0.7470 - accuracy: 0.7273
Epoch 10/100
5115/5115 - 0s - loss: 0.7134 - accuracy: 0.7384
Epoch 11/100
5115/5115 - 0s - loss: 0.6866 - accuracy: 0.7439
Epoch 12/100
5115/5115 - 0s - loss: 0.6637 - accuracy: 0.7494
Epoch 13/100
5115/5115 - 0s - loss: 0.6452 - accuracy: 0.7566
Epoch 14/100
5115/5115 - 0s - loss: 0.6283 - accuracy: 0.7627
Epoch 15/100
5115/5115 - 0s - loss: 0.6128 - accuracy: 0.7646
Epoch 16/100
5115/5115 - 0s - loss: 0.5995 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x1a43716610>

## Quantify our Trained Model

In [58]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.9375 - accuracy: 0.7748
Normal Neural Network - Loss: 0.5447074505590623, Accuracy: 0.7747800350189209


# Saving a Trained Model
We can save our trained models using the HDF5 binary format with the extension `.h5`

In [59]:
# Save the model
model.save("film_model_trained.h5")

# Loading a Model

In [60]:
# Load the model
from tensorflow.keras.models import load_model
film_model = load_model("film_model_trained.h5")

## Evaluating the loaded model

In [61]:
model_loss, model_accuracy = film_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1705/1 - 0s - loss: 0.9375 - accuracy: 0.7748
Normal Neural Network - Loss: 0.5447074505590623, Accuracy: 0.7747800350189209


# Predictions

In [62]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [63]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['6-7' '4-6' '7-8' '6-7' '4-6']
Actual Labels: ['6-7', '<4', '7-8', '6-7', '4-6']


In [67]:
parasite = np.array([[11000000, 415741,  7.725000, 7.775000, 5.778000, 8.6000]])

In [68]:
par = model.predict_classes(parasite)
pred = label_encoder.inverse_transform(par)

In [69]:
print(f"Predicted class: {pred}")

Predicted class: ['<4']


In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                160       
_________________________________________________________________
dense_1 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 105       
Total params: 685
Trainable params: 685
Non-trainable params: 0
_________________________________________________________________
