In [53]:
import pandas as pd
import numpy as np

from datetime import datetime 

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.optimizers import SGD

In [32]:
dat = pd.read_csv('../Data/dat_features.csv')

dat = dat.loc[:, "Access":]

In [33]:
unseen = dat[(dat.Impressions.eq(0.0)) & (dat.GRP.eq(0.0)) | (dat.active_flag.eq(1))]
training = dat[dat.Impressions > 0.0]

In [34]:
dat.columns

Index(['Access', 'DMA', 'Date_Aired', 'Estimate', 'GRP', 'Impressions',
       'Length', 'Market', 'Media', 'Spot_Cost', 'Spot_Type', 'Station_ID',
       'Time_Aired', 'DOW', 'daypart', 'Hours', 'Month', 'Quarter', 'creative',
       'is_outlier', 'Station_ID_', 'bins', 'bin_1', 'bin_2', 'bin_3', 'bin_4',
       'bin_5', 'BP', 'DC', 'DE', 'DP', 'GD', 'GX', 'PL', 'PM', 'PN', 'PT',
       'SR', 'SV', 'TN', 'VE', 'Q119', 'Q219', 'Q319', 'Q419', 'Cable',
       'DirecTV', 'Dish_Network', 'National_Network', 'Over-the-top_content',
       'Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',
       'Wednesday', 'Q1', 'Q2', 'Q3', 'Q4', 'Daytime', 'Early_Fringe',
       'Late_Fringe', 'Late_Night', 'Morning', 'Overnight', 'Primetime',
       'midnight', 'one_am', 'two_am', 'three_am', 'four_am', 'five_am',
       'six_am', 'seven_am', 'eight_am', 'nine_am', 'ten_am', 'eleven_am',
       'noon', 'one_pm', 'two_pm', 'three_pm', 'four_pm', 'five_pm', 'six_pm',
       'seven_pm', 'eig

In [35]:
labels = [
        'Q119', 'Q219',
        'Q319', 'Q419',
        'BP', 'DC', 'DE', 'DP',
        'GD', 'GX', 'PL', 'PM', 
        'PN', 'PT', 'SR', 'SV', 
        'TN', 'VE',
        "Length",
        "Spot_Cost",
        "Cable", 
        "DirecTV",
        "Dish_Network",
        "National_Network",
        "Over-the-top_content",
        "Monday",
        "Tuesday",
        "Wednesday",
        "Thursday",
        "Friday",
        "Saturday",
        "Sunday",
        'is_outlier',
        'bin_1', 'bin_2',                
        'bin_3', 'bin_4', 'bin_5',
        'midnight', 'one_am', 'two_am', 'three_am', 'four_am', 
        'five_am', 'six_am', 'seven_am', 'eight_am', 'nine_am', 
        'ten_am', 'eleven_am', 'noon', 'one_pm', 'two_pm',
        'three_pm', 'four_pm', 'five_pm', 'six_pm', 'seven_pm', 
        'eight_pm', 'nine_pm', 'ten_pm', 'eleven_pm'
    ]

X = training.loc[:, labels]
y = training.loc[:, "Impressions"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [36]:
dim = len(X_train.columns)

In [67]:
filepath = '../nn_files/weights.hdf5'
logspath = '../nn_files/logs/scalars/' + datetime.now().strftime("%Y%m%d-%H%M%S")

callbacks = [TensorBoard(log_dir=logspath),
             EarlyStopping(monitor='val_mean_squared_error', patience=10, mode='min'),
             ModelCheckpoint(filepath, monitor='val_mean_squared_error', verbose=1, mode='min', period=1, save_best_only=True),
             ReduceLROnPlateau(monitor='val_mean_squared_error', patience=3, verbose=1, factor=0.25, min_lr=0.000000001, mode='min')]

model = Sequential()
model.add(Dense(512, input_dim=dim, activation='relu'))
model.add(Dense(dim, activation='relu'))
model.add(Dense(dim, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

sgd = SGD(lr=0.00001)
model.compile(loss='mean_squared_error', 
              optimizer='adam',
              metrics=['mean_squared_error'])

model.fit(X_train, y_train, 
          epochs = 100, validation_split=0.2, callbacks=callbacks)

Train on 2744 samples, validate on 686 samples
Epoch 1/100

Epoch 00001: val_mean_squared_error improved from inf to 3010.02808, saving model to ../nn_files/weights.hdf5
Epoch 2/100
  32/2744 [..............................] - ETA: 0s - loss: 2277.8362 - mean_squared_error: 2277.8362




Epoch 00002: val_mean_squared_error improved from 3010.02808 to 2926.35229, saving model to ../nn_files/weights.hdf5
Epoch 3/100

Epoch 00003: val_mean_squared_error improved from 2926.35229 to 2680.24658, saving model to ../nn_files/weights.hdf5
Epoch 4/100

Epoch 00004: val_mean_squared_error improved from 2680.24658 to 2331.39478, saving model to ../nn_files/weights.hdf5
Epoch 5/100

Epoch 00005: val_mean_squared_error improved from 2331.39478 to 2223.48755, saving model to ../nn_files/weights.hdf5
Epoch 6/100

Epoch 00006: val_mean_squared_error improved from 2223.48755 to 2149.69458, saving model to ../nn_files/weights.hdf5
Epoch 7/100

Epoch 00007: val_mean_squared_error improved from 2149.69458 to 2133.04761, saving model to ../nn_files/weights.hdf5
Epoch 8/100

Epoch 00008: val_mean_squared_error improved from 2133.04761 to 2110.39209, saving model to ../nn_files/weights.hdf5
Epoch 9/100

Epoch 00009: val_mean_squared_error improved from 2110.39209 to 2092.48535, saving model 


Epoch 00032: val_mean_squared_error did not improve from 1546.14246
Epoch 33/100

Epoch 00033: val_mean_squared_error did not improve from 1546.14246
Epoch 34/100

Epoch 00034: val_mean_squared_error did not improve from 1546.14246

Epoch 00034: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 35/100

Epoch 00035: val_mean_squared_error improved from 1546.14246 to 1440.51086, saving model to ../nn_files/weights.hdf5
Epoch 36/100

Epoch 00036: val_mean_squared_error improved from 1440.51086 to 1437.65979, saving model to ../nn_files/weights.hdf5
Epoch 37/100

Epoch 00037: val_mean_squared_error improved from 1437.65979 to 1424.82654, saving model to ../nn_files/weights.hdf5
Epoch 38/100

Epoch 00038: val_mean_squared_error did not improve from 1424.82654
Epoch 39/100

Epoch 00039: val_mean_squared_error improved from 1424.82654 to 1417.24585, saving model to ../nn_files/weights.hdf5
Epoch 40/100

Epoch 00040: val_mean_squared_error did not improve from 1417.2458


Epoch 00061: val_mean_squared_error did not improve from 1364.06018
Epoch 62/100

Epoch 00062: val_mean_squared_error did not improve from 1364.06018
Epoch 63/100

Epoch 00063: val_mean_squared_error improved from 1364.06018 to 1363.41199, saving model to ../nn_files/weights.hdf5
Epoch 64/100

Epoch 00064: val_mean_squared_error improved from 1363.41199 to 1363.11267, saving model to ../nn_files/weights.hdf5
Epoch 65/100

Epoch 00065: val_mean_squared_error did not improve from 1363.11267
Epoch 66/100

Epoch 00066: val_mean_squared_error improved from 1363.11267 to 1362.69202, saving model to ../nn_files/weights.hdf5
Epoch 67/100

Epoch 00067: val_mean_squared_error improved from 1362.69202 to 1362.63074, saving model to ../nn_files/weights.hdf5
Epoch 68/100

Epoch 00068: val_mean_squared_error improved from 1362.63074 to 1362.16028, saving model to ../nn_files/weights.hdf5
Epoch 69/100

Epoch 00069: val_mean_squared_error did not improve from 1362.16028
Epoch 70/100

Epoch 00070: val


Epoch 00090: val_mean_squared_error improved from 1357.05896 to 1356.61047, saving model to ../nn_files/weights.hdf5
Epoch 91/100

Epoch 00091: val_mean_squared_error did not improve from 1356.61047
Epoch 92/100

Epoch 00092: val_mean_squared_error did not improve from 1356.61047
Epoch 93/100

Epoch 00093: val_mean_squared_error improved from 1356.61047 to 1356.20996, saving model to ../nn_files/weights.hdf5
Epoch 94/100

Epoch 00094: val_mean_squared_error improved from 1356.20996 to 1356.06750, saving model to ../nn_files/weights.hdf5
Epoch 95/100

Epoch 00095: val_mean_squared_error did not improve from 1356.06750
Epoch 96/100

Epoch 00096: val_mean_squared_error did not improve from 1356.06750
Epoch 97/100

Epoch 00097: val_mean_squared_error did not improve from 1356.06750

Epoch 00097: ReduceLROnPlateau reducing learning rate to 3.906250185536919e-06.
Epoch 98/100

Epoch 00098: val_mean_squared_error did not improve from 1356.06750
Epoch 99/100

Epoch 00099: val_mean_squared_err

<keras.callbacks.callbacks.History at 0x2353da84880>

In [68]:
preds = model.predict(X_test)

In [69]:
print(np.sqrt(MSE(y_test, preds)))

42.71950086570726
