Ref: http://gaussianprocess.org/gpml/data/

In [2]:
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [3]:
print(tf.__version__)

2.14.1


In [7]:
filepath = 'superconductivity.csv'
columns = list(df.columns)
target = 'critical_temp'

# read dataset
df = pd.read_csv(filepath)

df.shape

['number_of_elements', 'mean_atomic_mass', 'wtd_mean_atomic_mass', 'gmean_atomic_mass', 'wtd_gmean_atomic_mass', 'entropy_atomic_mass', 'wtd_entropy_atomic_mass', 'range_atomic_mass', 'wtd_range_atomic_mass', 'std_atomic_mass', 'wtd_std_atomic_mass', 'mean_fie', 'wtd_mean_fie', 'gmean_fie', 'wtd_gmean_fie', 'entropy_fie', 'wtd_entropy_fie', 'range_fie', 'wtd_range_fie', 'std_fie', 'wtd_std_fie', 'mean_atomic_radius', 'wtd_mean_atomic_radius', 'gmean_atomic_radius', 'wtd_gmean_atomic_radius', 'entropy_atomic_radius', 'wtd_entropy_atomic_radius', 'range_atomic_radius', 'wtd_range_atomic_radius', 'std_atomic_radius', 'wtd_std_atomic_radius', 'mean_Density', 'wtd_mean_Density', 'gmean_Density', 'wtd_gmean_Density', 'entropy_Density', 'wtd_entropy_Density', 'range_Density', 'wtd_range_Density', 'std_Density', 'wtd_std_Density', 'mean_ElectronAffinity', 'wtd_mean_ElectronAffinity', 'gmean_ElectronAffinity', 'wtd_gmean_ElectronAffinity', 'entropy_ElectronAffinity', 'wtd_entropy_ElectronAffini

(21263, 82)

In [48]:
len([target])

1

In [35]:
from sklearn.model_selection import train_test_split

y_train = df_train[target].values
X_train = df_train.drop(columns=[target]).values
y_test = df_test[target].values
X_test = df_test.drop(columns=[target]).values

X_train, X_vald, y_train, y_vald = train_test_split(X_train, y_train, train_size=4500, random_state=0)

print(f'X_train.shape: {X_train.shape}')
print(f'X_vald.shape: {X_vald.shape}')
print(f'X_test.shape: {X_test.shape}')


X_train.shape: (4500, 21)
X_vald.shape: (39984, 21)
X_test.shape: (4449, 21)


In [36]:
# Normalize target column

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
y_train = scaler.fit_transform(y_train.reshape((-1,1)))
y_vald = scaler.transform(y_vald.reshape((-1,1)))
y_test = scaler.transform(y_test.reshape((-1,1)))

y_train = y_train.ravel()
y_vald = y_vald.ravel()
y_test = y_test.ravel()

In [37]:
def array_to_dataset(data, target, shuffle=True, batch_size=128):
    ds = tf.data.Dataset.from_tensor_slices((data, target))
    if shuffle:
        ds = ds.shuffle(batch_size*2).batch(batch_size).prefetch(batch_size)
    else:
        ds = ds.batch(batch_size)
    return ds

batch_size = 4096
train_ds = array_to_dataset(X_train, y_train, batch_size=batch_size)
vald_ds = array_to_dataset(X_vald, y_vald, shuffle=False, batch_size=batch_size)
test_ds = array_to_dataset(X_test, y_test, shuffle=False, batch_size=batch_size)

In [38]:
import IterativeFeatureExclusion as IFE

n_features = X_train.shape[1]
_, counts = np.unique(y_train, return_counts=True)
n_response = 1
ife_num_layers = 1
clf_hidden_size = 65
num_att = 8
r = 5.9675

print(f'n_target: {n_target}')
print(f'n_features: {n_features}')

ife_params = {'n_features': n_features,
              'n_response': n_response,
              'num_att': num_att,
              'r': r,
              'ife_num_layers': ife_num_layers, 
              'clf_hidden_size': clf_hidden_size,              
             }
model = IFE.IFENetRegressor(**ife_params)

n_target: 1
n_features: 21


In [43]:
loss_fn = tf.keras.losses.MeanSquaredError()

lr = 0.015
lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=lr, 
                                                              decay_steps=2000,
                                                              decay_rate=0.95,
                                                              staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

checkpoint_path = 'checkpoints/ifeNet_sarcos.h5'
patience = 100
callbacks = [tf.keras.callbacks.EarlyStopping(patience=patience, monitor='val_loss'),
             tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, monitor='val_loss')]

epochs = 100
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['mae'])

In [44]:
saved_model_path = 'saved_model/ifeNet_sarcos.h5'
model.fit(train_ds, validation_data=vald_ds, epochs=epochs, callbacks=callbacks, verbose=2)
model.load_weights(checkpoint_path)
model.save_weights(saved_model_path)

Epoch 1/100
2/2 - 6s - loss: 0.6081 - mae: 0.5076 - val_loss: 0.6176 - val_mae: 0.5477 - 6s/epoch - 3s/step
Epoch 2/100
2/2 - 1s - loss: 0.6109 - mae: 0.5139 - val_loss: 0.6190 - val_mae: 0.5450 - 1s/epoch - 531ms/step
Epoch 3/100
2/2 - 1s - loss: 0.6130 - mae: 0.5183 - val_loss: 0.6215 - val_mae: 0.5485 - 1s/epoch - 505ms/step
Epoch 4/100
2/2 - 1s - loss: 0.6104 - mae: 0.5128 - val_loss: 0.6160 - val_mae: 0.5458 - 1s/epoch - 543ms/step
Epoch 5/100
2/2 - 1s - loss: 0.6083 - mae: 0.5087 - val_loss: 0.6170 - val_mae: 0.5471 - 977ms/epoch - 489ms/step
Epoch 6/100
2/2 - 1s - loss: 0.6090 - mae: 0.5108 - val_loss: 0.6163 - val_mae: 0.5457 - 996ms/epoch - 498ms/step
Epoch 7/100
2/2 - 1s - loss: 0.6078 - mae: 0.5077 - val_loss: 0.6128 - val_mae: 0.5419 - 978ms/epoch - 489ms/step
Epoch 8/100
2/2 - 1s - loss: 0.6081 - mae: 0.5090 - val_loss: 0.6097 - val_mae: 0.5374 - 994ms/epoch - 497ms/step
Epoch 9/100
2/2 - 1s - loss: 0.6077 - mae: 0.5075 - val_loss: 0.6134 - val_mae: 0.5404 - 1s/epoch - 508

In [16]:
y_pred = np.empty((0,))
y_test = np.empty((0,))

for data,label in test_ds:
    y_hat = model(data)
    y_pred = np.append(y_pred, y_hat.numpy().ravel())

    label = label.numpy()
    y_test = np.append(y_test, label.ravel())

In [24]:
y_pred.max()

0.6583020687103271

In [25]:
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))

0.4528119753756158
0.0054882658180464205
0.056801059710301555
