# 1) Creating a Neural Network in Keras

## 1.1) Preprocessing Training Data

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load training data and test data
training_data_df = pd.read_csv('data/sales_data_training.csv')
testing_data_df = pd.read_csv('data/sales_data_test.csv')

In [3]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0,1))

scaled_training_data = scaler.fit_transform(training_data_df)
scaled_testing_data = scaler.transform(testing_data_df)

In [4]:
# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[8], scaler.min_[8]))

Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913


In [5]:
training_data_df.columns.values

array(['critic_rating', 'is_action', 'is_exclusive_to_us', 'is_portable',
       'is_role_playing', 'is_sequel', 'is_sports', 'suitable_for_kids',
       'total_earnings', 'unit_price'], dtype=object)

In [6]:
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(data=scaled_training_data, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(data=scaled_testing_data, columns=testing_data_df.columns.values)

In [15]:
# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv('data/sales_data_training_scaled.csv', index=False)
scaled_testing_df.to_csv('data/sales_data_test_scaled.csv', index=False)

-------

# 1.2) Define Keras Model using Sequential API
- we want to predict `Total Earnings` for the games 

In [16]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [17]:
training_data_df = pd.read_csv('data/sales_data_training_scaled.csv')
training_data_df.head()

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.374714,1.0
1,0.833333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.192425,0.5
2,0.333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.114852,0.5
3,0.833333,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.142452,0.0
4,0.666667,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.480682,1.0


In [18]:
X = training_data_df.drop('total_earnings', axis=1).values
y = training_data_df[['total_earnings']].values

In [19]:
X.shape, y.shape

((1000, 9), (1000, 1))

In [20]:
y[:5]

array([[0.37471396],
       [0.19242528],
       [0.11485185],
       [0.14245208],
       [0.48068243]])

In [21]:
# Define the model
model = Sequential()

model.add(Dense(50, input_dim=9, activation='relu')) # as our number of features is 9
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))

model.add(Dense(1, activation='linear')) # linear is default too. we are predicting single value

model.compile(
    loss='mse',
    optimizer='adam'
)

In [24]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 50)                500       
_________________________________________________________________
dense_5 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_6 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 51        
Total params: 10,701
Trainable params: 10,701
Non-trainable params: 0
_________________________________________________________________


-------

# 2) Training Models

## 2.1) Training and Evaluation of Model

In [22]:
# Train the model
model.fit(
    X, 
    y,
    epochs=50,
    shuffle = True, #shuffle is true by default though
    verbose=2
)

Epoch 1/50
32/32 - 2s - loss: 0.0207
Epoch 2/50
32/32 - 0s - loss: 0.0028
Epoch 3/50
32/32 - 0s - loss: 0.0011
Epoch 4/50
32/32 - 0s - loss: 4.5699e-04
Epoch 5/50
32/32 - 0s - loss: 3.1295e-04
Epoch 6/50
32/32 - 0s - loss: 1.9573e-04
Epoch 7/50
32/32 - 0s - loss: 1.4639e-04
Epoch 8/50
32/32 - 0s - loss: 1.0866e-04
Epoch 9/50
32/32 - 0s - loss: 9.4893e-05
Epoch 10/50
32/32 - 0s - loss: 9.5402e-05
Epoch 11/50
32/32 - 0s - loss: 7.9593e-05
Epoch 12/50
32/32 - 0s - loss: 6.1124e-05
Epoch 13/50
32/32 - 0s - loss: 5.8854e-05
Epoch 14/50
32/32 - 0s - loss: 4.7164e-05
Epoch 15/50
32/32 - 0s - loss: 5.2579e-05
Epoch 16/50
32/32 - 0s - loss: 4.5479e-05
Epoch 17/50
32/32 - 0s - loss: 3.3523e-05
Epoch 18/50
32/32 - 0s - loss: 3.8041e-05
Epoch 19/50
32/32 - 0s - loss: 3.2778e-05
Epoch 20/50
32/32 - 0s - loss: 2.9138e-05
Epoch 21/50
32/32 - 0s - loss: 3.6182e-05
Epoch 22/50
32/32 - 0s - loss: 2.6849e-05
Epoch 23/50
32/32 - 0s - loss: 3.3815e-05
Epoch 24/50
32/32 - 0s - loss: 3.0148e-05
Epoch 25/50
3

<tensorflow.python.keras.callbacks.History at 0x9049269c40>

## Testing and Evaluation

In [27]:
# load separte test data
test_data_df = pd.read_csv('data/sales_data_test_scaled.csv')

X_test = test_data_df.drop('total_earnings', axis=1).values
y_test = test_data_df[['total_earnings']].values

In [28]:
test_error_rate = model.evaluate(X_test, y_test)



In [29]:
print('The mean squared error MSE for the test data set is {}'.format(test_error_rate))

The mean squared error MSE for the test data set is 8.224092744057998e-05


## 2.2) Making Predictions
- future of sale of new video game

In [34]:
# Load the data that we want to use to predict
# the values are already pre-scaled, so we will skip scaling
new_product_X = pd.read_csv('data/proposed_new_product.csv').values

In [35]:
new_product_X

array([[0.7, 1. , 1. , 1. , 0. , 1. , 0. , 1. , 0.8]])

In [38]:
# Make predictions with the neural network
prediction = model.predict(new_product_X)
prediction

array([[0.8559272]], dtype=float32)

In [39]:
# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]

In [40]:
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968


In [41]:
print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $262883.3664054351


# 2.3) Saving and Loading Models

### Save Model

In [43]:
model.save('models/trained_model.h5')
print('model saved to disk!')

model saved to disk!


### Load Model and make prediction

In [44]:
from tensorflow.keras.models import load_model

In [45]:
loaded_model = load_model('models/trained_model.h5')

In [47]:
prediction = loaded_model.predict(new_product_X)
prediction = prediction[0][0]

In [48]:
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $262883.3664054351
