# Setup

In [25]:
import pandas as pd
import numpy as np
import pickle
import time
import tensorflow as tf

from custom_methods import model_eval

datapath = '../Data/'

startTime = time.time()

In [16]:
filename = 'ann_ready.pickle'
infile = open(datapath+filename,'rb')
df = pickle.load(infile)
infile.close()

df.columns.to_list()

['MONTH',
 'BREAK_ARRANGEMENT',
 'BREAK_PAY_PLAN',
 'CALL_OUT',
 'CALL_OUT_MANUAL',
 'DUE_DATE',
 'FINAL_NOTICE',
 'PAST_DUE',
 'SEVERANCE_ELECTRIC',
 'SEVERANCE_GAS',
 'TOTAL_30_DAYS_AMT',
 'TOTAL_60_DAYS_AMT',
 'TOTAL_90_DAYS_AMT',
 'TOTAL_CUR_BALANCE',
 'SPA_PER_ID',
 'CMIS_MATCH',
 'HAS_COTENANT',
 'NUM_PREM_FOR_PER',
 'NUM_PER_FOR_PREM',
 'MULTI_DWELL_SIZE']

In [17]:
df = df.drop('MONTH', axis=1).astype('float')

event_col, id_col = ['CMIS_MATCH', 'SPA_PER_ID']

## Same Number of Positives, Negatives

In [None]:
# def resample_people(df: pd.DataFrame, )

In [33]:
pos = pd.Series(df[df.CMIS_MATCH == 1.0].SPA_PER_ID.unique())
neg = pd.Series(df[df.CMIS_MATCH == 0.0].SPA_PER_ID.unique())

In [57]:
pos_resample = pos.sample(n = neg.size, replace = True, random_state = 42)
pos_resample.name = 'positives'
pos_resample.size == neg.size

pos_resample = pos_resample.value_counts()
pos_resample.head()

77728.0     322
86238.0     320
222471.0    318
212524.0    318
271530.0    316
Name: positives, dtype: int64

In [59]:
pos_resample.sum() == neg.size

True

# Split Data

In [18]:
# Splitting on people
df_train, df_test = model_eval.split_on_people(df=df, id_col=id_col)

# Model

In [19]:
from keras.callbacks import EarlyStopping

# Source: https://analyticsindiamag.com/how-to-create-your-first-artificial-neural-network-in-python/
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD

from custom_methods import model_eval

callback = EarlyStopping(
    monitor='loss',
    min_delta=0.001,
    patience=2,
    restore_best_weights=True
)

model = Sequential()

# Create input layer + first hidden layer
model.add(
    Dense(
        input_dim = len(df.drop([event_col, id_col], axis=1).columns), 
        units = 10, 
        activation='relu', 
    )
)
# Add second hidden layer
model.add(
    Dense(
        units = 20, 
        activation='relu', 
        kernel_initializer='uniform'
    )
)
# Create output layer
model.add(
    Dense(
        units = 1, 
        activation='sigmoid', 
        kernel_initializer='uniform'
    )
)

# Compile ANN
model.compile(
    optimizer=SGD(), 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)

# Fit ANN
#model.fit(df_train.drop([event_col, id_col], axis=1), df_train[event_col], batch_size=10, epochs=5)

In [20]:
model.fit(
    x = df_train.drop([event_col, id_col], axis=1), 
    y = df_train[event_col], 
    batch_size = 10, 
    epochs = 10,
    #class_weight=class_weights,
    callbacks=[callback]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [None]:
predictions = model.predict(x=df_test.drop([event_col, id_col], axis=1))
predictions = pd.Series(np.transpose(predictions)[0])
actuals = df_test[event_col].reset_index()[event_col]

In [None]:
results = model_eval.get_model_metrics(predictions, actuals.astype('bool'))
results

In [None]:
'''
k = 10
results, summary = model_eval.k_fold_models(
    df=df, 
    event_col='CMIS_MATCH',
    id_col='SPA_PER_ID', 
    k=10, 
    model=model, 
    cutoffs=[0.05, 0.10, 0.15]
)
summary
'''

# Time and Save

In [None]:
from custom_methods.calc_time import calc_time_from_sec

calc_time_from_sec(time.time()-startTime)