In [180]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from tensorflow.keras import layers
from tensorflow.keras import backend as K
import tensorflow as tf

from random import seed
seed(1)

from os.path import join

In [181]:
def delete_params(params):
    DEL_PARAMS_LIST = [2,8,15,16,21,22,23]
    return np.delete(params,DEL_PARAMS_LIST , axis=1)

In [182]:
def prep_data():

    # load data from preprocess pipeline
    dataset = np.load(join('../data/d_dataset_t2/aug_data','training_subsets.npz'))
    X_train =dataset['X_train']
    y_train= dataset['y_train']
    X_val = dataset['X_val']
    y_val = dataset['y_val']
    X_test = dataset['X_test']
    y_test = dataset['y_test']

    y_train = delete_params(y_train)
    y_val = delete_params(y_val)
    y_test = delete_params(y_test)

    return X_train, X_val, X_test, y_train, y_val, y_test

In [183]:
X_train, X_val, X_test, y_train, y_val, y_test = prep_data()

In [184]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

def R2(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true, axis = 0) ))
    return   1 - (SS_res/(SS_tot + K.epsilon()))

def r2(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred), axis = 0) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true, axis = 0)), axis = 0) 
    return K.mean(1 - (SS_res/SS_tot), axis=0)

def baseline(input_shape_1,input_shape_2):
    model = tf.keras.Sequential()
    model.add(Flatten(input_shape=(input_shape_1,input_shape_2)))
    model.add(Dense(17, activation='linear'))
    model.summary()
    return model

In [185]:
X_train = X_train[0:512]
y_train = y_train[0:512]
X_val = X_val[0:512]
y_val = y_val[0:512]

In [186]:
tf.random.set_seed(42)
model = baseline(X_train.shape[1], X_train.shape[2])

model.compile(optimizer='adam',loss='mse',metrics=[rmse, r2])

history = model.fit(X_train,y_train,
        batch_size=256,
        epochs=1,
        validation_data=(X_val,y_val),
        verbose=1)

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_16 (Flatten)         (None, 936)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 17)                15929     
Total params: 15,929
Trainable params: 15,929
Non-trainable params: 0
_________________________________________________________________
Train on 512 samples, validate on 512 samples


In [187]:
from sklearn.metrics import r2_score
def compute_rmse(actual,pred, axis=0):
    return np.sqrt((np.square(actual - pred)).mean(axis=axis)).mean(axis=axis)

def numpy_R2(actual,pred):
    SSR = np.sum(np.square(actual-pred), axis=0)
    SST = np.sum(np.square(actual-np.mean(actual, axis=0)), axis=0)
    return np.mean(1 - (SSR/SST))

In [188]:
def compare_result(feature, actual, model):
    print('[SK] R2: %.4f, RMSE: %.4f'%(r2_score(actual,model.predict(feature)),compute_rmse(actual,model.predict(feature))))
    res = model.evaluate(feature,actual,verbose=False)
    print('[TF] R2: %.4f, RMSE: %.4f'%(res[2],res[1]))

In [189]:
compare_result(X_train, y_train, model)

[SK] R2: -6.6824, RMSE: 1.2073
[TF] R2: -7.3960, RMSE: 1.1715


In [190]:
compare_result(X_val, y_val, model)

[SK] R2: -6.5806, RMSE: 1.2097
[TF] R2: -7.1838, RMSE: 1.1817


## Breakdown

In [191]:
y_pred = model.predict(X_train)

### SSR

In [192]:
np.sum(np.square(y_train-y_pred), axis=0)

array([1034.44012006,  596.44214731,  539.48515714,  611.26677043,
        612.95225564,  821.82963648,  973.79616587,  823.94333075,
        796.56202416,  832.38496613,  702.17198335,  568.82452522,
        751.79345445,  749.44477869,  781.88009753,  640.35734209,
        963.54336283])

In [193]:
K.sum(K.square(y_train - y_pred), axis = 0) 

<tf.Tensor: id=25713, shape=(17,), dtype=float64, numpy=
array([1034.44012006,  596.44214731,  539.48515714,  611.26677043,
        612.95225564,  821.82963648,  973.79616587,  823.94333075,
        796.56202416,  832.38496613,  702.17198335,  568.82452522,
        751.79345445,  749.44477869,  781.88009753,  640.35734209,
        963.54336283])>

## SST

In [194]:
np.sum(np.square(y_train-np.mean(y_train, axis=0)), axis=0)

array([100.59569011, 107.02731931,  57.42602122,  93.18631381,
       159.78934101, 141.74241398, 194.46468125, 151.20102942,
        87.40076859, 119.63628027,  74.07852528,  87.84156185,
       107.00587661, 104.17985248,  78.83435227,  87.27858287,
        63.39210709])

In [195]:
y_train_tensor = tf.convert_to_tensor(y_train)
K.sum(K.square(y_train_tensor - K.mean(y_train_tensor, axis = 0)), axis = 0)

<tf.Tensor: id=25720, shape=(17,), dtype=float64, numpy=
array([100.59569011, 107.02731931,  57.42602122,  93.18631381,
       159.78934101, 141.74241398, 194.46468125, 151.20102942,
        87.40076859, 119.63628027,  74.07852528,  87.84156185,
       107.00587661, 104.17985248,  78.83435227,  87.27858287,
        63.39210709])>

## R2 

In [196]:
SSR = np.sum(np.square(y_train-y_pred), axis=0)
SST = np.sum(np.square(y_train-np.mean(y_train, axis=0)), axis=0)
np.mean(1 - (SSR/SST))

-6.682410482590197

In [197]:
SSR = K.sum(K.square(y_train - y_pred), axis = 0) 
SST = K.sum(K.square(y_train_tensor - K.mean(y_train_tensor, axis = 0)), axis = 0)
K.mean(1 - SSR/(SST + K.epsilon()), axis=-1)

<tf.Tensor: id=25737, shape=(), dtype=float64, numpy=-6.682410473981724>

In [198]:
r2(y_train_tensor, y_pred)

<tf.Tensor: id=25753, shape=(), dtype=float64, numpy=-6.682410482590197>