Preprocess data

In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Load training data set from CSV file
training_data_df =pd.read_csv("sales_data_training.csv")

# Load testing data set from CSV file
test_data_df =pd.read_csv("sales_data_test.csv")

In [4]:
training_data_df.head(3)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1,0,1,0,1,0,0,132717,59.99
1,4.5,0,0,0,0,1,1,0,83407,49.99
2,3.0,0,0,0,0,1,1,0,62423,49.99


In [5]:
test_data_df.head(3)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1,1,1,0,1,0,1,247537,59.99
1,2.5,0,0,0,1,1,0,0,73960,59.99
2,3.5,0,0,0,0,1,1,0,82671,59.99


In [6]:
# Data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well.
scaler = MinMaxScaler(feature_range=(0,1))

In [7]:
# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)

# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[8], scaler.min_[8]))

Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913


In [8]:
type(scaled_testing)

numpy.ndarray

In [9]:
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)

In [10]:
scaled_testing_df.head(3)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,0.5,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.799179,1.0
1,0.166667,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.157502,1.0
2,0.5,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.189704,1.0


In [11]:
# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("sales_data_training_scaled.csv", index=False)
scaled_testing_df.to_csv("sales_data_test_scaled.csv", index=False)

In [12]:
training_data_df = pd.read_csv("sales_data_training_scaled.csv")

In [13]:
training_data_df.head(3)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.374714,1.0
1,0.833333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.192425,0.5
2,0.333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.114852,0.5


In [14]:
X = training_data_df.drop('total_earnings', axis=1).values
Y = training_data_df[['total_earnings']].values

In [19]:
# Define the model
from keras.models import Sequential
from keras.layers import *
model = Sequential()
model.add(Dense(50,input_dim = 9,activation='relu',name='layer_1'))
model.add(Dense(100,activation='relu',name='layer_2'))
model.add(Dense(50,activation='relu',name='layer_3'))
model.add(Dense(1,activation='linear',name='output_layer'))
model.compile(loss="mean_squared_error",optimizer="adam")

In [20]:
import keras
# Create a TensorBoard logger
logger = keras.callbacks.TensorBoard(
    log_dir='logs',
    write_graph=True,
    histogram_freq=5
)


In [21]:
#Train the model
model.fit(X,Y,
          epochs=50,
          shuffle=True,
          verbose=2,
          callbacks=[logger])

Epoch 1/50
32/32 - 1s - loss: 0.0103
Epoch 2/50
32/32 - 0s - loss: 0.0014
Epoch 3/50
32/32 - 0s - loss: 5.4881e-04
Epoch 4/50
32/32 - 0s - loss: 2.7483e-04
Epoch 5/50
32/32 - 0s - loss: 1.8502e-04
Epoch 6/50
32/32 - 0s - loss: 1.4086e-04
Epoch 7/50
32/32 - 0s - loss: 1.1869e-04
Epoch 8/50
32/32 - 0s - loss: 1.0863e-04
Epoch 9/50
32/32 - 0s - loss: 7.3082e-05
Epoch 10/50
32/32 - 0s - loss: 6.6052e-05
Epoch 11/50
32/32 - 0s - loss: 7.8320e-05
Epoch 12/50
32/32 - 0s - loss: 6.6515e-05
Epoch 13/50
32/32 - 0s - loss: 4.2608e-05
Epoch 14/50
32/32 - 0s - loss: 5.3067e-05
Epoch 15/50
32/32 - 0s - loss: 3.5204e-05
Epoch 16/50
32/32 - 0s - loss: 3.2998e-05
Epoch 17/50
32/32 - 0s - loss: 3.3933e-05
Epoch 18/50
32/32 - 0s - loss: 3.4996e-05
Epoch 19/50
32/32 - 0s - loss: 3.2750e-05
Epoch 20/50
32/32 - 0s - loss: 2.7186e-05
Epoch 21/50
32/32 - 0s - loss: 4.1206e-05
Epoch 22/50
32/32 - 0s - loss: 3.8985e-05
Epoch 23/50
32/32 - 0s - loss: 2.7616e-05
Epoch 24/50
32/32 - 0s - loss: 2.8479e-05
Epoch 25/

<keras.callbacks.History at 0x7f8be76baf50>

In [22]:
# Load the separate test data set
test_data_df = pd.read_csv("sales_data_test_scaled.csv")

X_test = test_data_df.drop('total_earnings', axis=1).values
Y_test = test_data_df[['total_earnings']].values

test_error_rate =model.evaluate(X_test,Y_test,verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))

The mean squared error (MSE) for the test data set is: 8.514883666066453e-05


In [24]:
# Load the data we make to use to make a prediction
X = pd.read_csv("proposed_new_product.csv").values

# Make a prediction with the neural network
prediction =model.predict(X)

# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]

# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $261768.391291782


In [25]:
model.save("trained_model.h5")
print("Model saved to disk.")

Model saved to disk.


In [26]:
from keras.models import load_model

In [27]:
model1=load_model("trained_model.h5")

In [28]:
X = pd.read_csv("proposed_new_product.csv").values
prediction = model1.predict(X)

# Grab just the first element of the first prediction (since we only have one)
prediction = prediction[0][0]

# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $261768.391291782
