Train a neural network using tensorflow to correlate basic oxide perovskite properties to OER activities.

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.inspection import permutation_importance
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import Callback

Load, normalize and split the data.

In [3]:
# Load data
df = pd.read_excel("data_gp.xlsx")
df = df.drop(['mu/t', 'mu*RA', 'mu*t', 'RA*t', 'mu/RA', 'RA/t'], axis=1)
df

Unnamed: 0,t,mu,RA,XA,XB,QA,Nd,VRHE
0,0.993,0.43,1.36,1.1,1.55,3.0,4.0,1.791583
1,0.998,0.422,1.36,1.1,1.73,3.0,5.5,1.72275
2,1.003,0.415,1.36,1.1,1.91,3.0,7.0,1.707833
3,0.988,0.437,1.36,1.1,1.725,3.0,6.0,1.774417
4,1.004,0.414,1.36,1.1,1.902,3.0,6.8,1.790833
5,1.004,0.413,1.36,1.1,1.894,3.0,6.6,1.753917
6,1.009,0.407,1.36,1.1,1.83,3.0,5.0,1.759083
7,1.01,0.407,1.365,1.115,1.83,3.0,5.0,1.724667
8,1.012,0.407,1.37,1.13,1.83,3.0,5.0,1.755583
9,1.011,0.404,1.36,1.1,1.88,3.0,6.0,1.720583


In [4]:
# Select input features and target
features = ['t', 'mu', 'RA', 'XA', 'XB', 'QA', 'Nd']
target = 'VRHE'
X = df[features].values
y = df[target].values

# Normalize input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train = X_scaled[:18, :]
y_train = y[:18]
X_val = X_scaled[18:, :]
y_val = y[18:]

X_train, y_train, X_val, y_val

(array([[-1.08223085,  1.62676953, -0.77345319,  0.86166497, -3.46038823,
          0.97887708, -1.88632043],
        [-0.95938445,  1.18516573, -0.77345319,  0.86166497, -1.30350668,
          0.97887708,  0.04477912],
        [-0.83653805,  0.79876242, -0.77345319,  0.86166497,  0.85337488,
          0.97887708,  1.97587866],
        [-1.20507726,  2.01317285, -0.77345319,  0.86166497, -1.36342005,
          0.97887708,  0.68847897],
        [-0.81196877,  0.74356194, -0.77345319,  0.86166497,  0.75751347,
          0.97887708,  1.71839873],
        [-0.81196877,  0.68836147, -0.77345319,  0.86166497,  0.66165207,
          0.97887708,  1.46091879],
        [-0.68912236,  0.35715862, -0.77345319,  0.86166497, -0.10523915,
          0.97887708, -0.59892073],
        [-0.66455308,  0.35715862, -0.71123065,  1.0650686 , -0.10523915,
          0.97887708, -0.59892073],
        [-0.61541452,  0.35715862, -0.64900811,  1.26847224, -0.10523915,
          0.97887708, -0.59892073],
        [-

Build a 4-layer neural network with early stopping and learning rate decay, and train with 500 epochs.

In [24]:
class EarlyStoppingWithWarmup(Callback):
    def __init__(self, monitor='val_mae', mode='min', patience=5, warmup_epochs=150, restore_best_weights=True, verbose=1):
        super().__init__()
        self.monitor = monitor
        self.patience = patience
        self.warmup_epochs = warmup_epochs
        self.restore_best_weights = restore_best_weights
        self.verbose = verbose
        
        self.wait = 0
        self.best_weights = None
        self.stopped_epoch = 0
        
        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.inf
        else:
            self.monitor_op = np.greater
            self.best = -np.inf

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        current = logs.get(self.monitor)
        if current is None:
            print(f"Warning: EarlyStopping requires {self.monitor} available!")
            return
        
        # Warmup: Skip monitoring during the warmup period
        if epoch < self.warmup_epochs:
            if self.verbose:
                print(f"Epoch {epoch+1}: Warmup phase ({epoch+1}/{self.warmup_epochs}) - {self.monitor}: {current:.4f}")
            return

        if self.monitor_op(current, self.best):
            self.best = current
            self.best_weights = self.model.get_weights()
            self.wait = 0
            if self.verbose:
                print(f"Epoch {epoch+1}: {self.monitor} improved to {current:.4f}")
        else:
            self.wait += 1
            if self.verbose:
                print(f"Epoch {epoch+1}: {self.monitor} did not improve. Wait count: {self.wait}/{self.patience}")
            if self.wait >= self.patience:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                if self.restore_best_weights:
                    if self.best_weights is not None:
                        self.model.set_weights(self.best_weights)
                        print(f"Restoring model weights from epoch {self.stopped_epoch - self.patience + 1}")
                print(f"EarlyStopping triggered at epoch {self.stopped_epoch + 1}")

    def on_train_end(self, logs=None):
        if self.stopped_epoch > 0 and self.verbose:
            print(f"Training stopped at epoch {self.stopped_epoch + 1} due to early stopping.")


# Setting learning rate decay
initial_lr = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=initial_lr,
    decay_steps=400,
    decay_rate=0.9,
    staircase=True
)

# Create an EarlyStopping callback
early_stop = EarlyStoppingWithWarmup(
    monitor='val_mae',          # Metric to monitor
    patience=5,                 # Wait 5 epochs after min before stopping
    mode='min',                 # Stop when val_mae stops decreasing
    verbose=1,
    restore_best_weights=True
)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.07), input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.06)),
    tf.keras.layers.Dense(16, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.06)),
    tf.keras.layers.Dense(1)
])

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss='mse', metrics=['mae'])

# Train model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=600, verbose=1, callbacks=[early_stop])

Epoch 1/600
Epoch 1: Warmup phase (1/150) - val_mae: 1.4173[0m [1m0s[0m 2s/step - loss: 10.2921 - mae: 1.6133
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 10.2921 - mae: 1.6133 - val_loss: 9.6172 - val_mae: 1.4173
Epoch 2/600
Epoch 2: Warmup phase (2/150) - val_mae: 1.3367[0m [1m0s[0m 24ms/step - loss: 9.9765 - mae: 1.5323
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - loss: 9.9765 - mae: 1.5323 - val_loss: 9.3347 - val_mae: 1.3367
Epoch 3/600
Epoch 3: Warmup phase (3/150) - val_mae: 1.2541[0m [1m0s[0m 22ms/step - loss: 9.6785 - mae: 1.4523
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 9.6785 - mae: 1.4523 - val_loss: 9.0623 - val_mae: 1.2541
Epoch 4/600
Epoch 4: Warmup phase (4/150) - val_mae: 1.1752[0m [1m0s[0m 22ms/step - loss: 9.4048 - mae: 1.3760
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 9.4048 - mae: 1.3760 - val_loss: 8.8137 - val_mae: 1.175

The MAE for training set is 0.0236 eV after 402 epochs of training. Evaluate the model with cross-validation set.

In [25]:
val_loss, val_mae = model.evaluate(X_val, y_val)
print(f"Validation MAE from model.evaluate: {val_mae:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0791 - mae: 0.0243
Validation MAE from model.evaluate: 0.0243


In [26]:
df_ml = pd.DataFrame({
    'epoch': np.arange(1, len(history.history['loss']) + 1),
    'training loss': history.history['loss'],
    'training mae': history.history['mae'],
    'validation loss': history.history['val_loss'],
    'validation mae': history.history['val_mae']
})

df_ml.to_csv('training_metrics.csv', index=False)

In [27]:
y_pred = model.predict(X_scaled)

df_mlresult = pd.DataFrame({
    'y_target': np.ravel(y),  # Flatten y to 1D too, if needed
    'y_pred': np.ravel(y_pred)
})
df_mlresult

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299ms/step


Unnamed: 0,y_target,y_pred
0,1.791583,1.76304
1,1.72275,1.74317
2,1.707833,1.724792
3,1.774417,1.759123
4,1.790833,1.724371
5,1.753917,1.724576
6,1.759083,1.727178
7,1.724667,1.726471
8,1.755583,1.725015
9,1.720583,1.71667


In [28]:
from sklearn.inspection import permutation_importance

r = permutation_importance(model, X_val, y_val,
                           scoring='r2',
                           n_repeats=30,
                           random_state=0)

for i in r.importances_mean.argsort()[::-1]:
    print(f"{features[i]:<8}"
            f"{r.importances_mean[i]:.3f}"
            f" +/- {r.importances_std[i]:.3f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [29]:
r.importances.shape

(7, 30)

In [30]:
df_imp = pd.DataFrame()

for i, feature in enumerate(features):
    df_imp[feature] = r.importances[i]

df_imp

Unnamed: 0,t,mu,RA,XA,XB,QA,Nd
0,0.107764,0.011662,0.152442,-0.117109,0.047277,-0.055348,-0.081956
1,0.100802,0.392042,0.074133,-0.207469,0.01128,-0.05257,0.15316
2,0.002187,0.042981,0.022382,-0.109571,-0.053612,-0.066202,-0.15847
3,0.044259,0.333255,-0.092739,-0.19548,-0.041295,-0.094024,-0.02106
4,0.046117,-0.062226,0.094421,0.07893,-0.038429,0.065467,-0.166033
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.100802,0.392042,0.074133,-0.207469,0.01128,-0.05257,0.15316
7,-0.037752,0.163215,-0.13587,0.031019,-0.051722,0.025151,-0.053818
8,0.127643,0.243443,0.138632,-0.251461,-0.039035,-0.092929,-0.059419
9,0.072737,-0.101542,0.124974,-0.091007,-0.038317,-0.035744,-0.278937


In [32]:
from tensorflow.keras.utils import plot_model

plot_model(model, to_file='nn_structure.png', sshow_shapes=True, show_layer_names=True, dpi=300)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.
