Ref: 

http://gaussianprocess.org/gpml/data/

Split ratio: https://github.com/Kaixhin/SARCOS

In [1]:
import sys
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

sys.path.append("../tf_ifenet")

In [2]:
print(tf.__version__)

2.14.1


In [3]:
from model import IFENetRegressor
from config import DataConfig, ModelConfig
from utility import dataframe_to_dataset

In [4]:
filepath_train = 'sarcos_inv.csv'
filepath_test = 'sarcos_inv_test.csv'

num_col_names = [str(c) for c in range(0,21)]
cat_col_names = []
target_columns = [str(c) for c in range(21,28)]

# read training set
train = pd.read_csv(filepath_train)

# read test set
test = pd.read_csv(filepath_test)

In [5]:
from sklearn.model_selection import train_test_split

train, vald = train_test_split(train, test_size=4500, random_state=0)

print(f'Training set: {train.shape}')
print(f'Validation set: {vald.shape}')
print(f'Test set: {test.shape}')

batch_size = 2048
train_ds = dataframe_to_dataset(train, target_columns, batch_size=batch_size)
vald_ds = dataframe_to_dataset(vald, target_columns, shuffle=False, batch_size=batch_size)
test_ds = dataframe_to_dataset(test, target_columns, shuffle=False, batch_size=batch_size)

Training set: (39984, 28)
Validation set: (4500, 28)
Test set: (4449, 28)


In [6]:
data_config = DataConfig(categorical_column_names=cat_col_names, 
                         numerical_column_names=num_col_names,
                         category_output_mode='one_hot',
                         is_normalization=False)
model_config = ModelConfig(num_att=16,
                           r=3.5,
                           clf_num_layers=1,
                           clf_hidden_units=[32],
                           reduction_layer='flatten')

model = IFENetRegressor(data_config, model_config)
model.build_model(train_ds)

In [7]:
loss_fn = tf.keras.losses.MeanSquaredError()

lr = 0.015
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

checkpoint_path = 'checkpoints/ifeNet_sarcos.h5'
patience = 20
callbacks = [tf.keras.callbacks.EarlyStopping(patience=patience, monitor='val_loss'),
             tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, monitor='val_loss')]

epochs = 2
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['mse'])

In [8]:
saved_model_path = 'saved_model/ifeNet_sarcos.keras'
model.fit(train_ds, validation_data=vald_ds, epochs=epochs, callbacks=callbacks, verbose=2)
model.load_weights(checkpoint_path)
model.save(saved_model_path)

Epoch 1/2
20/20 - 12s - loss: 285.0869 - mse: 285.0869 - val_loss: 269.0504 - val_mse: 269.0504 - 12s/epoch - 580ms/step
Epoch 2/2
20/20 - 6s - loss: 126.2772 - mse: 126.2772 - val_loss: 150.8378 - val_mse: 150.8378 - 6s/epoch - 302ms/step


In [9]:
saved_model_path = 'saved_model/ifeNet_sarcos.keras'
ifenet = tf.keras.models.load_model(saved_model_path, safe_mode=False)

In [10]:
y_pred = np.empty((0,len(target_columns)))
y_test = np.empty((0,len(target_columns)))

for data,label in test_ds:
    y_hat = ifenet(data)
    y_pred = np.append(y_pred, y_hat, axis=0)
    
    label = label.numpy()
    y_test = np.append(y_test, label, axis=0)

In [11]:
print(f'R2 Score: {r2_score(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')

R2 Score: 0.13582362557153282
MSE: 152.3602556024732
MAE: 8.000952721007106


In [12]:
ifenet(next(iter(test_ds.map(lambda x,y: x))))
df = ifenet.get_feature_importance()
df

NameError: name 'pd' is not defined

In [79]:
saved_model_path = 'saved_model/ifeNet_sarcos.keras'
ifenet = tf.keras.models.load_model(saved_model_path)

ValueError: Requested the deserialization of a Lambda layer with a Python `lambda` inside it. This carries a potential risk of arbitrary code execution and thus it is disallowed by default. If you trust the source of the saved model, you can pass `safe_mode=False` to the loading function in order to allow Lambda layer loading.

In [19]:
model.get_feature_importance(train.columns)

TypeError: get_feature_importance() takes 1 positional argument but 2 were given

In [9]:
from ife import IFENetRegressor

n_features = X_train.shape[1]
_, counts = np.unique(y_train, return_counts=True)
n_response = len(targets)
ife_num_layers = 1
clf_num_layers = 1
clf_hidden_units = [128]
reduction_layer = 'flatten'
num_att = 128
r = 4.0

print(f'n_response: {n_response}')
print(f'n_features: {n_features}')

ife_params = {'n_features': n_features,
              'n_outputs': n_response,
              'num_att': num_att,
              'r': r,
              'ife_num_layers': ife_num_layers, 
              'clf_num_layers': clf_num_layers,
              'clf_hidden_units': clf_hidden_units,
              'reduction_layer': reduction_layer
             }
model = IFENetRegressor(**ife_params)
model.build(input_shape=(None,n_features))

path_saved_model = 'saved_model/ifeNet_sarcos_att_128.h5'
model.load_weights(path_saved_model)

n_response: 7
n_features: 21


In [14]:
feat_scores = model.input_scores
feat_scores = np.mean(feat_scores, axis=(0,1))

feat_rank = {}
for col,score in zip(features,feat_scores):
    #print(f'{col}: {score}')
    feat_rank[col] = score

df_feat_rank = pd.DataFrame(list(feat_rank.items()), columns=['Feature', 'Score'])
df_feat_rank.sort_values(by='Score', ascending=False)

Unnamed: 0,Feature,Score
14,14,0.19208
0,0,0.096904
17,17,0.077443
1,1,0.071178
2,2,0.063495
15,15,0.063181
7,7,0.049514
16,16,0.048306
3,3,0.038565
12,12,0.032125
