In [1]:
import pandas
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import OneHotEncoder
import plotly
from datetime import datetime

In [2]:
pseudo_rtr_feed = pandas.read_csv('//pwv-dc-fs03/SRI/Analytics Joint Pipeline/Sprint 5/4 - Short term demand spikes/data/pseudo_rtr_feed.csv',
                                 parse_dates = ['CASE_DATE', 'DATE_CALL_START_FINCAD'],
                                 infer_datetime_format = True)

In [3]:
pseudo_rtr_feed['HOUR_CALL_START_FINCAD'] = pseudo_rtr_feed['DATE_CALL_START_FINCAD'].astype('datetime64[h]')
pseudo_rtr_feed['hour'] = pseudo_rtr_feed['DATE_CALL_START_FINCAD'].dt.hour.astype(str)
pseudo_rtr_feed['month'] = pseudo_rtr_feed['DATE_CALL_START_FINCAD'].dt.month_name()
pseudo_rtr_feed['weekday'] = pseudo_rtr_feed['DATE_CALL_START_FINCAD'].dt.day_name()

pseudo_rtr_feed['month_weekday'] = pseudo_rtr_feed['month'] + '_' + pseudo_rtr_feed['weekday']
    
pseudo_rtr_feed['weekday_hour'] = pseudo_rtr_feed['weekday'] + '_' + pseudo_rtr_feed['hour']
pseudo_rtr_feed.reset_index(inplace=True)

In [4]:
#print(pseudo_rtr_feed.shape)
print(pseudo_rtr_feed.dtypes)
print(pseudo_rtr_feed.describe())
#print(pseudo_rtr_feed.head())

index                              int64
RN                                 int64
CASE_DATE                 datetime64[ns]
DATE_CALL_START_FINCAD    datetime64[ns]
LOCATION_DESC                     object
X_COORD                          float64
Y_COORD                          float64
COMMONEVENTID                    float64
EVENT_NUM                         object
INITIAL_PRIORITY_CODE              int64
INITIAL_ET_DESCRIPTION            object
INITIAL_EVENT_TYPE                object
FINAL_ET_DESCRIPTION              object
FINAL_EVENT_TYPE                  object
HOUR_CALL_START_FINCAD    datetime64[ns]
hour                              object
month                             object
weekday                           object
month_weekday                     object
weekday_hour                      object
dtype: object
              index         RN       X_COORD       Y_COORD  COMMONEVENTID  \
count  1.344192e+06  1344192.0  1.344076e+06  1.344076e+06   1.344191e+06   
mean   6.720

In [5]:
hourly_rtr = pseudo_rtr_feed[['hour', 'weekday', 'month', 'month_weekday', 'weekday_hour', 'HOUR_CALL_START_FINCAD', 'EVENT_NUM']].groupby(['hour', 'weekday', 'month', 'month_weekday', 'weekday_hour', 'HOUR_CALL_START_FINCAD']).count().reset_index()
hourly_rtr.sort_values('HOUR_CALL_START_FINCAD', inplace = True)
print(hourly_rtr.shape)
hourly_rtr = pandas.get_dummies(hourly_rtr, drop_first = True)
print(hourly_rtr.shape)


(18120, 7)
(18120, 292)


In [6]:
def build_model(train_dataset):
    model = keras.Sequential([
        layers.Dense(128, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l2(0.01), input_shape=[len(train_dataset.keys())]),
        keras.layers.Dropout(0.2),
        layers.Dense(64, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l2(0.01)),
        keras.layers.Dropout(0.2),
        layers.Dense(1)
    ])
    
    optimizer = tf.train.RMSPropOptimizer(0.001)
    
    model.compile(loss='mse',
                  optimizer=optimizer,
                  metrics=['mae', 'mse'])
    return model

In [7]:
hourly_rtr['weight'] = 1 + (hourly_rtr['EVENT_NUM'] - hourly_rtr['EVENT_NUM'].min())/(hourly_rtr['EVENT_NUM'].max()-hourly_rtr['EVENT_NUM'].min())
train_dataset = hourly_rtr.copy().loc[0:(0.8 * hourly_rtr.shape[0])]
test_dataset = hourly_rtr.copy().drop(train_dataset.index)
print(train_dataset.shape)
print(test_dataset.shape)
print(train_dataset['weight'].describe())


(11133, 293)
(6987, 293)
count    11133.000000
mean         1.312996
std          0.107662
min          1.000000
25%          1.216450
50%          1.337662
75%          1.398268
max          1.731602
Name: weight, dtype: float64


In [8]:
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')

def prepData(df):
    return df.drop(columns=['HOUR_CALL_START_FINCAD', 'EVENT_NUM'])

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=50)

In [9]:
model = build_model(prepData(train_dataset))

history = model.fit(
    prepData(train_dataset),
    train_dataset['EVENT_NUM'],
    epochs=1000,
    validation_split = 0.2,
    verbose=0,
    sample_weight = train_dataset.weight.values,
    callbacks=[early_stop, PrintDot()])

InternalError: Blas GEMM launch failed : a.shape=(32, 291), b.shape=(291, 128), m=32, n=128, k=291
	 [[Node: dense/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](_arg_dense_input_0_0/_43, dense/MatMul/ReadVariableOp)]]
	 [[Node: metrics/mean_absolute_error/Mean_1/_69 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_380_metrics/mean_absolute_error/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

In [None]:
hist = pandas.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
import matplotlib.pyplot as plt

def plot_history(history):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error')
    plt.plot(hist['epoch'], hist['mean_absolute_error'],
             label='Train Error')
    plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
             label = 'Val Error')
    plt.legend()

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Square Error')
    plt.plot(hist['epoch'], hist['mean_squared_error'],
             label='Train Error')
    plt.plot(hist['epoch'], hist['val_mean_squared_error'],
             label = 'Val Error')
    plt.legend()

plot_history(history)

In [None]:
def plot_predictions(fit):
    plt.figure()
    plt.xlabel('Date')
    plt.ylabel('Incidents')
    plt.plot(fit['HOUR_CALL_START_FINCAD'], fit['actual'],
             label='Actual')
    plt.plot(fit['HOUR_CALL_START_FINCAD'], fit['fitted'],
             label = 'Fitted')
    plt.xlim([datetime(2018, 11, 1), fit['HOUR_CALL_START_FINCAD'].max()])
    plt.legend()


test_predictions = pandas.DataFrame({'HOUR_CALL_START_FINCAD' : test_dataset['HOUR_CALL_START_FINCAD'],
                                    'actual' : test_dataset['EVENT_NUM'],
                                    'fitted' : model.predict(prepData(test_dataset)).flatten()})
test_predictions.sort_values('HOUR_CALL_START_FINCAD', inplace = True)
#print(test_predictions)
plot_predictions(test_predictions)