In [None]:
import datetime, time, os
import numpy as np
import pandas as pd

import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import layers
from tensorflow.keras import regularizers

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
    
print('Using TensorFlow version: %s' % tf.__version__)

RSEED = 1337

In [None]:
!pip install -q git+https://github.com/tensorflow/docs
    
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots

In [None]:
# Import Data

#data = pd.read_csv('data/data_prep_reg.csv', index_col=[0]) # includes the statistics of the features only location D
data = pd.read_csv('data/data_prep_feat.csv', index_col=[0]) # includes all values as a new features only location D
#data = pd.read_csv('data/data_prep_reg_all.csv', index_col=[0]) # include the statistics of the features an all locations

print(f'Data: {data.shape}')

In [None]:
#fillna
data = data.fillna(data.mean())

In [None]:
data.head()

In [None]:
columns_to_drop = list(data.columns)[2:44]
columns_to_drop.append('target')
columns_to_drop.append('location')

In [None]:
# = ['target', 'location', feature_stats]
# define features and target
X = data.drop(columns_to_drop, axis=1)
y = data.target

# test train split: 
X_train, X_test, y_train, y_test = train_test_split(  
                                    X, y, test_size = 0.3, 
                                    random_state = RSEED) 

print (f'X: {X.shape}')
print (f'y: {y.shape}')

print (f'X_train: {X_train.shape}')
print (f'y_train: {y_train.shape}')

print (f'X_test: {X_test.shape}')
print (f'y_test: {y_test.shape}')

In [None]:
X_train.head()

In [None]:
# pre selecting

N_VAL =  len(X_test)
N_TRAIN = len(X_train)
BATCH_SIZE = 96
STEPS_PER_EPOCH = N_TRAIN // BATCH_SIZE
EPOCHS = 5000

In [None]:
# preparation for Tensorboard

# Define path for new directory 
root_logdir = os.path.join(os.curdir, "my_logs")

# Define function for creating a new folder for each run
def get_run_logdir():
    run_id = time.strftime('run_%d_%m_%Y-%H_%M_%S')
    return os.path.join(root_logdir, run_id)
    
run_logdir = get_run_logdir()
#def get_callbacks():

def get_callbacks(name):
    return tf.keras.callbacks.TensorBoard(run_logdir+name, histogram_freq=1)

In [None]:
# checkpoint 

# Define path where checkpoints should be stored
checkpoint_path = "DNN/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=False,
                                                 verbose=1) # Set verbose != 0 if you want output during training


In [None]:
# Plotting function for MSE
def plot_metric(history):
    plt.plot(history.history['mse'])
    plt.plot(history.history['val_mse'])
    plt.title('Model MSE')
    plt.ylabel('MSE')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper right')
    plt.show()

In [None]:
# Plotting function for loss
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.title('Model Loss')
    plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def error_analysis(y_test, y_pred_test):
    """Generated true vs. predicted values and residual scatter plot for models

    Args:
        y_test (array): true values for y_test
        y_pred_test (array): predicted values of model for y_test
    """     
    # Calculate residuals
    residuals = y_test - y_pred_test
    
    # Plot real vs. predicted values 
    fig, ax = plt.subplots(1,2, figsize=(15, 5))
    plt.subplots_adjust(right=1)
    plt.suptitle('Error Analysis')
    
    ax[0].scatter(y_pred_test, y_test, color="#FF5A36", alpha=0.7)
    ax[0].plot([-400, 350], [-400, 350], color="#193251")
    ax[0].set_title("True vs. predicted values", fontsize=16)
    ax[0].set_xlabel("predicted values")
    ax[0].set_ylabel("true values")
    #ax[0].set_xlim((y_pred_test.min()-10), (y_pred_test.max()+10))
    ax[0].set_ylim((y_test.min()-40), (y_test.max()+40))
    
    ax[1].scatter(y_pred_test, residuals, color="#FF5A36", alpha=0.7)
    ax[1].plot([-400, 350], [0,0], color="#193251")
    ax[1].set_title("Residual Scatter Plot", fontsize=16)
    ax[1].set_xlabel("predicted values")
    ax[1].set_ylabel("residuals")
    #ax[1].set_xlim((y_pred_test.min()-10), (y_pred_test.max()+10))
    #ax[1].set_ylim((residuals.min()-10), (residuals.max()+10));

In [None]:
def evaluate(model, X_test, y_test, X_train, y_train):
    # plot MSE history
    plot_metric(model)
    # plot loss history
    plot_loss(model)

    # evaluate the model:
    # Evaluate the small model on test set using .evaluate
    loss, mse = model.evaluate(X_test, y_test, verbose=2)
    print(f'Model MSE: {mse}')
    print('--------'*5)

    # Predict values for test set
    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)

    rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))

    test_results['first model'] =  [rmse_train, rmse_test]

    print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print('RMSE:', np.sqrt(mean_squared_error(y_train, y_pred_train)))

    fig, ax = plt.subplots(1,2, figsize=(15, 5))

    plt.subplots_adjust(right=1)
    plt.suptitle('Error Analysis')


    ax[0].scatter(y_pred, y_test, color="#FF5A36", alpha=0.7)
    ax[0].plot([-400, 350], [-400, 350], color="#193251")
    ax[0].set_title("True vs. predicted values", fontsize=16)
    ax[0].set_xlabel("predicted values")
    ax[0].set_ylabel("true values TEST")
    #ax[0].set_xlim((y_pred_test.min()-10), (y_pred_test.max()+10))
    ax[0].set_ylim((y_test.min()-40), (y_test.max()+40))

    ax[1].scatter(y_pred_train, y_test, color="#FF5A36", alpha=0.7)
    ax[1].plot([-400, 350], [-400, 350], color="#193251")
    ax[1].set_title("True vs. predicted values", fontsize=16)
    ax[1].set_xlabel("predicted values")
    ax[1].set_ylabel("true values Train")
    #ax[0].set_xlim((y_pred_test.min()-10), (y_pred_test.max()+10))
    ax[1].set_ylim((y_test.min()-40), (y_test.max()+40))
    pass

In [None]:
# Define dictionary to store results
training_history = {}
test_results = {}

In [None]:
def model_compile_and_fit(X, y, model, name, optimizer, max_epochs=30):
    # Get optimizer
    #optimizer=tf.keras.optimizers.Adam()

    # model.compile
    model.compile(optimizer=optimizer,
                metrics='mse', # [tf.keras.metrics.RootMeanSquaredError()]
                loss='mae')
    # model.fit
    training_history[name] = model.fit(X, 
                        y,
                        validation_split=0.2,
                        verbose=1,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        epochs=EPOCHS, 
                        callbacks=get_callbacks(name))
    # return results
    return training_history[name]

In [None]:
with tf.device('/cpu:0'):
      all_features_model = tf.keras.Sequential([
            tf.keras.layers.Dense(512,kernel_initializer = 'uniform', activation='relu', input_dim = 726),
            tf.keras.layers.Dense(512,kernel_initializer = 'uniform', activation='relu'),
            tf.keras.layers.Dense(512,kernel_initializer = 'uniform', activation='relu'),
            tf.keras.layers.Dense(512,kernel_initializer = 'uniform', activation='relu'),
            tf.keras.layers.Dense(1,kernel_initializer = 'uniform')
      ])

In [None]:
with tf.device('/cpu:0'):
    all_features_model_history = model_compile_and_fit(X=X_train, 
                                            y=y_train,
                                            model= all_features_model,
                                            name='all_features_model',
                                            optimizer='Adam', 
                                            max_epochs= EPOCHS )

In [None]:
with tf.device('/cpu:0'):
    evaluate(all_features_model, X_test, y_test, X_train, y_train)

In [None]:
# Save the entire small model as a SavedModel.
!mkdir -p saved_model
all_features_model.save('saved_model/dnn_model')

In [None]:

training_history

In [None]:


test_results