# Neural Network

## Enviroment

In [9]:
import os
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

import sys
sys.path.append('../')

from Preprocessing import preprocessing
from Preprocessing.single_set import SingleSet

## Data Preprocessing

Features: data.data_features
Targets: data.data_targets (click, bidprice, payprice)

In [69]:
train_data_path = '/Data/train.csv'
train_data = SingleSet(relative_path=data_path,use_numerical_labels=True)

val_data_path = '/Data/validation.csv'
val_data = SingleSet(relative_path=data_path,use_numerical_labels=True)

## potentially drop unnecessary features

-- data loaded --
-- data loaded --


In [86]:
def prepare_data_for_classification(data):

    ## features
    features = np.asarray(data.data_features.values)

    ## targets
    labels = np.asarray(data.data_targets.values)

    return features, labels

x_train, y_train = prepare_data_for_classification(train_data)
x_val, y_val = prepare_data_for_classification(val_data)

## Model - Neural Network

In [172]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras import regularizers, initializers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.models import Model, Input
from keras import optimizers

import matplotlib.pylab as plt
from random import shuffle
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import model_from_json

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
import warnings
warnings.filterwarnings('ignore', '.*do not.*',)



def create_model(input_shape, output_shape):

    ## sequential model
    model = Sequential()
    
    #model.add(Dense(10, input_dim=input_shape, activation='tanh'))
    #model.add(Dense(12, activation='relu'))
    #model.add(Dense(3, input_dim=input_shape, activation='relu'))

    # working model___________
    #model.add(Dense(3, input_dim=input_shape, activation='relu'))
    #model.add(Dense(12, activation='relu'))
    #model.add(Dense(12, activation='relu'))


    #model.add(Dropout(0.5))
    #model.add(Dropout(0.1))
    model.add(Dense(output_shape, input_dim=input_shape, activation='relu'))

    ## other way of defining model
    
    #inputs = Input(shape=(input_shape,))
    #outputs = Dense(output_shape, activation="relu", kernel_regularizer=regularizers.l2(5e-4),kernel_initializer=initializers.he_normal(seed=13))(inputs)
    #model = Model(inputs=inputs, outputs=outputs, name='bidder')

    return model


def plot_curves(history):

    # Loss Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['loss'], 'r', linewidth=3.0)
    plt.plot(history.history['val_loss'], 'b', linewidth=3.0)
    plt.legend(['Training loss', 'Validation Loss'], fontsize=18)
    plt.xlabel('Epochs ', fontsize=16)
    plt.ylabel('Loss', fontsize=16)
    plt.title('Loss Curves', fontsize=16)
    plt.savefig('Results/neural_network/'  + '/loss_curve.png')
    #plt.show()

    # Accuracy Curves
    plt.figure(figsize=[8, 6])
    plt.plot(history.history['acc'], 'r', linewidth=3.0)
    plt.plot(history.history['val_acc'], 'b', linewidth=3.0)
    plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=18)
    plt.xlabel('Epochs ', fontsize=16)
    plt.ylabel('Accuracy', fontsize=16)
    plt.title('Accuracy Curves', fontsize=16)
    plt.savefig('Results/neural_network/'  + '/accuracy_curve.png')
    plt.show()


In [175]:
input_shape = x_train.shape[1]
output_shape = y_train.shape[1]

print("features: ", input_shape, "targets: ", output_shape)

# Clear model, and create it
model = create_model(input_shape, output_shape)
print(model.summary())



## save model to disk_____________________________________

# serialize model to JSON
model_json = model.to_json()

#if not os.path.exists('Results/neural_network'):
#    os.makedirs('Results/neural_network/')

#with open('Results/neural_network/' + 'model_architecture.json', "w") as json_file:
#    json_file.write(model_json)

    

## hyperparameters_______________________________________
    

## model compile and train
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# specify learning rate
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

#model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])


# Set callback functions to early stop training and save the best model so far
callbacks = [EarlyStopping(monitor='loss', patience=100),
             ModelCheckpoint('Results/neural_network/' + 'trained_weights.h5', monitor='loss', save_best_only=True)]

# Fit the model on the batches
batch_size = 4
epochs = 2
history = model.fit(x=x_train, y=y_train, batch_size=batch_size, 
                    epochs=epochs, validation_data=(x_val, y_val))
# verbose=1, shuffle=True, callbacks=callbacks

## model evaluate
plot_curves(history)

features:  22 targets:  3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_223 (Dense)            (None, 3)                 69        
Total params: 69
Trainable params: 69
Non-trainable params: 0
_________________________________________________________________
None
Train on 303925 samples, validate on 303925 samples
Epoch 1/2


KeyboardInterrupt: 

## Prediction

In [147]:
## Load Model for Testing_______________________________________________________________

# Model reconstruction from JSON file
#with open('Results/neural_network/' + 'model_architecture.json', 'r') as f:
#    model_test = model_from_json(f.read())

# Load weights into the new model
#model_test.load_weights('Results/neural_network/' + 'trained_weights.h5')

#model_test.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


predictions = model.predict(x_val, verbose=1)
predictions_df = pd.DataFrame(predictions, columns= ['click', 'bidprice', 'payprice'])

print(predictions_df['click'].value_counts())
print(predictions_df)

#test_score = model.evaluate(x = x_val, y = y_val)
#print('\ntest accuracy:', test_score[1])

0.0    303925
Name: click, dtype: int64
        click  bidprice  payprice
0         0.0  4.418545  1.229837
1         0.0  7.026196  1.929624
2         0.0  4.265340  1.177227
3         0.0  4.418545  1.229837
4         0.0  6.331016  1.811837
5         0.0  4.480392  1.236434
6         0.0  7.026196  1.929624
7         0.0  5.526566  1.577196
8         0.0  4.265340  1.177227
9         0.0  8.523921  2.426386
10        0.0  4.418545  1.229837
11        0.0  4.418545  1.229837
12        0.0  7.026196  1.929624
13        0.0  3.527709  0.980420
14        0.0  6.289568  1.846444
15        0.0  8.523921  2.426386
16        0.0  7.026196  1.929624
17        0.0  7.026196  1.929624
18        0.0  4.418545  1.229837
19        0.0  4.418545  1.229837
20        0.0  5.871641  1.654992
21        0.0  4.418545  1.229837
22        0.0  4.480392  1.236434
23        0.0  4.418545  1.229837
24        0.0  4.265340  1.177227
25        0.0  5.649624  1.576489
26        0.0  8.523921  2.426386
27      