In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import pandas as pd
from sklearn.model_selection import LeaveOneOut
from binaryapproximator import *


In [None]:
octet = pd.read_csv('octet.csv')
octet_selected = octet[['sign(Delta E)', 'Delta E', 'IP(A)', 'EA(A)', 'H(A)', 'L(A)', 'rs(A)', 'rp(A)', 'rd(A)', 'IP(B)', 'EA(B)', 'H(B)', 'L(B)', 'rs(B)', 'rp(B)', 'rd(B)', 'EN(A)', 'EN(B)']]
X_r = octet_selected[['IP(A)', 'EA(A)', 'H(A)', 'L(A)', 'rs(A)', 'rp(A)', 'rd(A)', 'IP(B)', 'EA(B)', 'H(B)', 'L(B)', 'rs(B)', 'rp(B)', 'rd(B)', 'EN(A)', 'EN(B)']].to_numpy()
y = octet_selected[['Delta E']].to_numpy()
# y = octet_selected[['sign(Delta E)']].to_numpy()
# y = np.where( y>=0, np.float32(1), np.float32(0))
X,_,_ = normalise_data(X_r)

In [None]:
input = 16
hidden1 = 32
hidden2 = 16
# hidden3 = 32
output = 1
loss_criterion = nn.MSELoss()
loo_cv = LeaveOneOut()
# dataset = ToTensor(X,y)
# dataloader = DataLoader(dataset=dataset, batch_size=int(len(X)), shuffle=True)

# Method 0: Regular NN Training
1. Initialise the Weights
    
Do for $t < \tau$:

2. $y_{pred} = Model(X,W_t, \theta_t)$ (Froward Step)
3. $loss(y, y_{pred})$ 
4. $\nabla_{w_t} = \frac{\partial loss}{\partial W_t}$ and $\nabla_{\theta_t} = \frac{\partial loss}{\partial \theta_t}$ (Backward Step)
5. $W_{t+1} = W_t + \eta \nabla_{w_t}$ and $\theta_{t+1} = \theta_t + \eta \nabla_{\theta_t}$ (Update Step)

In [None]:
n_reps = 10
model_finals_0 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_0 = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_0 = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_0_std = np.empty(shape= (82), dtype=object)
train_loss_0 = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_0_std = np.empty(shape= (10), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_base = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=False)
        tr = TrainNN(model=model_base, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        # model_finals_0[reps, set_ind] = model_final
        epoch_losses_0[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict(X[test_ind])
        test_loss_0[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_0[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_0_std[samp] = test_loss_0[:,samp].std()

for reps in range(n_reps):
    train_loss_0_std[reps] = train_loss_0[reps,:].std()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
fig.suptitle(' Scenario 0: Base NN ', fontsize=15)
ax.errorbar(y, test_loss_0.mean(axis=0), yerr=np.array(test_loss_0_std)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test')
ax.set_xlabel("$\Delta$E")
ax.set_ylabel("loss")
ax.set_title(f'Mean and Std of MSE per Test Sample over {n_reps} Repetitions', fontsize=10)
ax.legend()

In [None]:
base_dict = dict()
for name, param in model_base.named_parameters():
    base_dict[name] = param.detach().numpy().flatten()

base_df_0 = pd.DataFrame.from_dict(base_dict, orient='index')
base_df_0.to_csv('base_model_0.csv')

# Method 1: Adding L1 and Bianry Penalty + Binarising the Proposition Layer After Full Training
1. Initialise the Weights
    
Do for $t < \tau$:

2. $y_{pred} = Model(X,W_t, \theta_t)$ (Froward Step)
3. $loss(y, y_{pred}, p_{l_1}(\theta_t), p_{binary}(W_t))$ 
4. $\nabla_{w_t} = \frac{\partial loss}{\partial W_t}$ and $\nabla_{\theta_t} = \frac{\partial loss}{\partial \theta_t}$ (Backward Step)
5. $W_{t+1} = W_t + \eta \nabla_{w_t}$ and $\theta_{t+1} = \theta_t + \eta \nabla_{\theta_t}$ (Update Step)

After converging

6. $W = Binarise(W_{\tau})$

In [None]:
# search for l1 and binary lambda
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=529)
lambda_range = np.logspace(-3, -1, num=10)
valid_loss_per_pair_lambda_stp = np.zeros([len(lambda_range), len(lambda_range)])
valid_loss_per_pair_lambda_sig = np.zeros([len(lambda_range), len(lambda_range)])
for l1_ind, l1 in enumerate(lambda_range):
    for lb_ind, lb in enumerate(lambda_range):
        valid_loss_stp = []
        valid_loss_sig = []
        for train_ind, valid_ind in kf.split(X):
            model_ = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
            tr = TrainNN(model=model_, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
            loss_func = LossFunction(mse=True, l1_lambda=l1, binary_lambda=lb)
            model_final, _ = tr.nn_training(loss_func=loss_func)
            model_final.binarise_model()
            y_pred_test = model_final.predict_binary(X[valid_ind])
            valid_loss_stp.append(np.mean((y[valid_ind]-y_pred_test)**2))
            y_pred_test = model_final.predict(X[valid_ind])
            valid_loss_sig.append(np.mean((y[valid_ind]-y_pred_test)**2))
        valid_loss_per_pair_lambda_stp[l1_ind, lb_ind] = np.mean(valid_loss_stp)
        valid_loss_per_pair_lambda_sig[l1_ind, lb_ind] = np.mean(valid_loss_sig)

In [None]:
l1_lambda_stp_1 = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_stp), valid_loss_per_pair_lambda_stp.shape)[0]]
binary_lambda_stp_1 = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_stp), valid_loss_per_pair_lambda_stp.shape)[1]]
n_reps = 10
model_finals_1 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_1_stp = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_1_stp = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_1_std_stp = np.empty(shape= (82), dtype=object)
train_loss_1_stp = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_1_std_stp = np.empty(shape= (10), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_1 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
        tr = TrainNN(model=model_1, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l1_lambda_stp_1, binary_lambda=binary_lambda_stp_1)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        model_final.binarise_model()
        # model_finals_0[reps, set_ind] = model_final
        epoch_losses_1_stp[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict(X[test_ind])
        test_loss_1_stp[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_1_stp[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_1_std_stp[samp] = test_loss_1_stp[:,samp].std()

for reps in range(n_reps):
    train_loss_1_std_stp[reps] = train_loss_1_stp[reps,:].std()

In [None]:
l1_lambda_sig_1 = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_sig), valid_loss_per_pair_lambda_sig.shape)[0]]
binary_lambda_sig_1 = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_sig), valid_loss_per_pair_lambda_sig.shape)[1]]
n_reps = 10
model_finals_1 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_1_sig = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_1_sig = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_1_std_sig = np.empty(shape= (82), dtype=object)
train_loss_1_sig = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_1_std_sig = np.empty(shape= (10), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_1 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
        tr = TrainNN(model=model_1, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l1_lambda_sig_1, binary_lambda=binary_lambda_sig_1)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        model_final.binarise_model()
        # model_finals_0[reps, set_ind] = model_final
        epoch_losses_1_sig[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict(X[test_ind])
        test_loss_1_sig[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_1_sig[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_1_std_sig[samp] = test_loss_1_sig[:,samp].std()

for reps in range(n_reps):
    train_loss_1_std_sig[reps] = train_loss_1_sig[reps,:].std()

# Method 2: Binarising the Proposition Layer During Training
1- Initialise the Weights (Intentionally we choose random integers from -1 to 1 for hidden layer)

Do for $t < \tau$:

2. Keeping $W_t, \theta_t$ 
3. $W_b = Binarise(W_t)$ 
4. $y_{pred} = Model(X,W_b, \theta_t)$ (Froward Step)
5. $loss(y, y_{pred})$ 
6. $\nabla_{w_b} = \frac{\partial loss}{\partial W_b}$ and $\nabla_{\theta_t} = \frac{\partial loss}{\partial \theta_t}$ (Backward Step)
7. $W_{t+1} = W_t + \eta \nabla_{w_b}$ and $\theta_{t+1} = \theta_t + \eta \nabla_{\theta_t}$ (Update Step)

After converging

8. $W = Binarise(W_{\tau})$ 

In [None]:
n_reps = 10
model_finals_2 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_2 = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_2_stp = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_2_sig = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_2_std_stp = np.empty(shape= (82), dtype=object)
test_loss_2_std_sig = np.empty(shape= (82), dtype=object)
train_loss_2_stp = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_2_sig = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_2_std_stp = np.empty(shape= (n_reps), dtype=object)
train_loss_2_std_sig = np.empty(shape= (n_reps), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_2 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
        tr = BinaryTrainNN(model=model_2, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        model_final.binarise_model()
        # model_finals_0[reps, set_ind] = model_final
        epoch_losses_2[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict_binary(X[test_ind])
        test_loss_2_stp[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict_binary(X[train_ind])
        train_loss_2_stp[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        y_pred_test = model_final.predict(X[test_ind])
        test_loss_2_sig[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_2_sig[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_2_std_stp[samp] = test_loss_2_stp[:,samp].std()
    test_loss_2_std_sig[samp] = test_loss_2_sig[:,samp].std()

for reps in range(n_reps):
    train_loss_2_std_stp[reps] = train_loss_2_stp[reps,:].std()
    train_loss_2_std_sig[reps] = train_loss_2_sig[reps,:].std()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle(' Scenario 2 ', fontsize=15)
ax[0].errorbar(y, test_loss_2_stp.mean(axis=0), yerr=np.array(test_loss_2_std_stp)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test (step activation)')
ax[0].errorbar(y, test_loss_2_sig.mean(axis=0), yerr=np.array(test_loss_2_std_sig)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test (sigmoid activation)')
# ax[0].scatter(range(82), train_loss_0.mean(axis=0), label='Train')
ax[0].set_xlabel("$\Delta$E")
ax[0].set_ylabel("loss")
# ax[0].set_yscale("log")
ax[0].set_title(f'Mean and Std of MSE for Test Set over {n_reps} Repetitions', fontsize=10)
ax[0].legend()
ax[1].errorbar(range(n_reps), train_loss_2_stp.mean(axis=1), yerr=np.array(train_loss_2_std_stp)*1.96/(n_reps**0.5), marker='o', linestyle='--', label='Train (step activation)')
ax[1].errorbar(range(n_reps), train_loss_2_sig.mean(axis=1), yerr=np.array(train_loss_2_std_sig)*1.96/(n_reps**0.5), marker='o', linestyle='--', label='Train (sigmoid activation)')
ax[1].set_xlabel("Repetition No.")
# ax[1].set_yscale("log")
ax[1].set_ylabel("loss")
ax[1].set_title(f'Mean and Std of MSE for Train Set over {n_reps} Repetitions', fontsize=10)
ax[1].legend()

In [None]:
binary_dict = dict()
for name, param in model_2.named_parameters():
    binary_dict[name] = param.detach().numpy().flatten()

binary_df_2 = pd.DataFrame.from_dict(binary_dict, orient='index')
binary_df_2.to_csv('binary_model_2.csv')


# Method 3: Sparsified the Base NN (with adding L1 Penalty)
1. Initialise the Weights
    
Do for $t < \tau$:

2. $y_{pred} = Model(X,W_t, \theta_t)$ (Froward Step)
3. $loss(y, y_{pred}, p_{l_1}(\theta_t))$  
4. $\nabla_{w_t} = \frac{\partial loss}{\partial W_t}$ and $\nabla_{\theta_t} = \frac{\partial loss}{\partial \theta_t}$ (Backward Step)
5. $W_{t+1} = W_t + \eta \nabla_{w_t}$ and $\theta_{t+1} = \theta_t + \eta \nabla_{\theta_t}$ (Update Step)

In [None]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=529)
lambda_range = np.logspace(-3, -1, num=10)
valid_loss_per_lambda = []
for l in lambda_range:
    valid_loss_ = []
    for train_ind, valid_ind in kf.split(X):
        model_ = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=False)
        tr = TrainNN(model=model_, x=X[train_ind], y=y[train_ind], gradient_threshold=0.008, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l)
        model_final, _ = tr.nn_training(loss_func=loss_func)
        y_pred_test = model_final.predict(X[valid_ind])
        valid_loss_.append(np.mean((y[valid_ind]-y_pred_test)**2))
    valid_loss_per_lambda.append(np.mean(valid_loss_))
lambda_range[valid_loss_per_lambda.index(min(valid_loss_per_lambda))]

In [None]:
l1_lambda_3 = lambda_range[valid_loss_per_lambda.index(min(valid_loss_per_lambda))]
n_reps = 10
model_finals_3 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_3 = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_3 = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_3_std = np.empty(shape= (82), dtype=object)
train_loss_3 = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_3_std = np.empty(shape= (n_reps), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_3 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=False)
        tr = TrainNN(model=model_3, x=X[train_ind], y=y[train_ind], gradient_threshold=0.008, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l1_lambda_3)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        # model_finals_3[reps, set_ind] = model_final
        epoch_losses_3[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict(X[test_ind])
        test_loss_3[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_3[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_3_std[samp] = test_loss_3[:,samp].std()

for reps in range(n_reps):
    train_loss_3_std[reps] = train_loss_3[reps,:].std()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle(' Scenario 2: Base NN + L1 Penalty ', fontsize=15)
ax.errorbar(y, test_loss_3.mean(axis=0), yerr=np.array(test_loss_3_std)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test')
# ax.scatter(range(82), train_loss_0.mean(axis=0), label='Train')
ax.set_xlabel("$\Delta$E")
ax.set_ylabel("loss")
# ax.set_yscale("log")
ax.set_title(f'Mean and Std of MSE for Test Set over {n_reps} Repetitions', fontsize=10)
ax.legend()

In [None]:
base_dict = dict()
for name, param in model_3.named_parameters():
    base_dict[name] = param.detach().numpy().flatten()

base_df_3 = pd.DataFrame.from_dict(base_dict, orient='index')
base_df_3.to_csv('base_model_3.csv')

# Method 4: Adding L1 and Binary Penalty + Binarising the Proposition Layer During Training
1- Initialise the Weights (Intentionally we choose random integers from -1 to 1 for hidden layer)

Do for $t < \tau$:

2. Keeping $W_t, \theta_t$ 
3. $W_b = Binarise(W_t)$ 
4. $y_{pred} = Model(X,W_b, \theta_t)$ (Froward Step)
5. $loss(y, y_{pred},p_{l_1}(\theta_t), p_{binary}(W_t))$ 
6. $\nabla_{w_b} = \frac{\partial loss}{\partial W_b}$ and $\nabla_{\theta_t} = \frac{\partial loss}{\partial \theta_t}$ (Backward Step)
7. $W_{t+1} = W_t + \eta \nabla_{w_b}$ and $\theta_{t+1} = \theta_t + \eta \nabla_{\theta_t}$ (Update Step)

After converging

8. $W = Binarise(W_{\tau})$ 

In [None]:
# search for l1 and binary lambda
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=529)
lambda_range = np.logspace(-3, -1, num=10)
valid_loss_per_pair_lambda_stp = np.zeros([len(lambda_range), len(lambda_range)])
valid_loss_per_pair_lambda_sig = np.zeros([len(lambda_range), len(lambda_range)])
for l1_ind, l1 in enumerate(lambda_range):
    for lb_ind, lb in enumerate(lambda_range):
        valid_loss_stp = []
        valid_loss_sig = []
        for train_ind, valid_ind in kf.split(X):
            model_ = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
            tr = BinaryTrainNN(model=model_, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
            loss_func = LossFunction(mse=True, l1_lambda=l1, binary_lambda=lb)
            model_final, _ = tr.nn_training(loss_func=loss_func)
            model_final.binarise_model()
            y_pred_test = model_final.predict_binary(X[valid_ind])
            valid_loss_stp.append(np.mean((y[valid_ind]-y_pred_test)**2))
            y_pred_test = model_final.predict(X[valid_ind])
            valid_loss_sig.append(np.mean((y[valid_ind]-y_pred_test)**2))
        valid_loss_per_pair_lambda_stp[l1_ind, lb_ind] = np.mean(valid_loss_stp)
        valid_loss_per_pair_lambda_sig[l1_ind, lb_ind] = np.mean(valid_loss_sig)

In [None]:
l1_lambda_4_stp = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_stp), valid_loss_per_pair_lambda_stp.shape)[0]]
binary_lambda_4_stp = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_stp), valid_loss_per_pair_lambda_stp.shape)[1]]
n_reps = 10
model_finals_4 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_4_stp = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_4_stp = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_4_std_stp = np.empty(shape= (82), dtype=object)
train_loss_4_stp = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_4_std_stp = np.empty(shape= (n_reps), dtype=object)

for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_4 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
        tr = BinaryTrainNN(model=model_4, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l1_lambda_4_stp, binary_lambda=binary_lambda_4_stp)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        model_final.binarise_model()
        # model_finals_3[reps, set_ind] = model_final
        epoch_losses_4_stp[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict_binary(X[test_ind])
        test_loss_4_stp[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict_binary(X[train_ind])
        train_loss_4_stp[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_4_std_stp[samp] = test_loss_4_stp[:,samp].std()

for reps in range(n_reps):
    train_loss_4_std_stp[reps] = train_loss_4_stp[reps,:].std()

In [None]:
l1_lambda_4_sig = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_sig), valid_loss_per_pair_lambda_sig.shape)[0]]
binary_lambda_4_sig = lambda_range[np.unravel_index(np.argmin(valid_loss_per_pair_lambda_sig), valid_loss_per_pair_lambda_sig.shape)[1]]
n_reps = 10
model_finals_4 = np.empty(shape= (n_reps, 82), dtype=object)
epoch_losses_4_sig = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_4_sig = np.empty(shape= (n_reps, 82), dtype=object)
test_loss_4_std_sig = np.empty(shape= (82), dtype=object)
train_loss_4_sig = np.empty(shape= (n_reps, 82), dtype=object)
train_loss_4_std_sig = np.empty(shape= (n_reps), dtype=object)
for reps in range(n_reps):
    print(reps)
    set_ind = 0
    for train_ind, test_ind in loo_cv.split(X):
        model_4 = GeneralRegressionNN(input_size=input, hidden_size1=hidden1, hidden_size2=hidden2, output_size=output, balancing_bias=True)
        tr = BinaryTrainNN(model=model_4, x=X[train_ind], y=y[train_ind], gradient_threshold=0.005, learning_rate=0.001)
        loss_func = LossFunction(mse=True, l1_lambda=l1_lambda_4_sig, binary_lambda=binary_lambda_4_sig)
        model_final, epoch_loss = tr.nn_training(loss_func=loss_func)
        model_final.binarise_model()
        # model_finals_3[reps, set_ind] = model_final
        epoch_losses_4_sig[reps, set_ind] = epoch_loss
        y_pred_test = model_final.predict(X[test_ind])
        test_loss_4_sig[reps, set_ind] = np.mean((y[test_ind]-y_pred_test)**2)
        y_pred_train = model_final.predict(X[train_ind])
        train_loss_4_sig[reps, set_ind] = np.mean((y[train_ind]-y_pred_train)**2)

        set_ind  += 1

for samp in range(82):
    test_loss_4_std_sig[samp] = test_loss_4_sig[:,samp].std()

for reps in range(n_reps):
    train_loss_4_std_sig[reps] = train_loss_4_sig[reps,:].std()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
fig.suptitle(' Scenario 2: Binarised NN + Binary Penalty + L1 Penalty ', fontsize=15)
ax[0].errorbar(y, test_loss_4_stp.mean(axis=0), yerr=np.array(test_loss_4_std_stp)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test (step activation)')
ax[0].errorbar(y, test_loss_4_sig.mean(axis=0), yerr=np.array(test_loss_4_std_sig)*1.96/(n_reps**0.5), marker='o', linestyle=' ', label='Test (sigmoid activation)')
# ax[0].scatter(range(82), train_loss_0.mean(axis=0), label='Train')
ax[0].set_xlabel("$\Delta$E")
ax[0].set_ylabel("loss")
# ax[0].set_yscale("log")
ax[0].set_title(f'Mean and Std of MSE for Test Set over {n_reps} Repetitions', fontsize=10)
ax[0].legend()
ax[1].errorbar(range(n_reps), train_loss_4_stp.mean(axis=1), yerr=np.array(train_loss_4_std_stp)*1.96/(n_reps**0.5), marker='o', linestyle='--', label='Train (step activation)')
ax[1].errorbar(range(n_reps), train_loss_4_sig.mean(axis=1), yerr=np.array(train_loss_4_std_sig)*1.96/(n_reps**0.5), marker='o', linestyle='--', label='Train (sigmoid activation)')
ax[1].set_xlabel("Repetition No.")
# ax[1].set_yscale("log")
ax[1].set_ylabel("loss")
ax[1].set_title(f'Mean and Std of MSE for Train Set over {n_reps} Repetitions', fontsize=10)
ax[1].legend()

In [None]:
binary_dict = dict()
for name, param in model_4.named_parameters():
    binary_dict[name] = param.detach().numpy().flatten()

binary_df_4 = pd.DataFrame.from_dict(binary_dict, orient='index')
binary_df_4.to_csv('binary_model_4.csv')

In [None]:
yaxis_test_loss = [test_loss_0,test_loss_2_stp, test_loss_3,test_loss_4_stp, test_loss_2_sig, test_loss_4_sig]
yaxis_std_test_loss = [test_loss_0_std,test_loss_2_std_stp, test_loss_3_std,test_loss_4_std_stp,test_loss_2_std_sig, test_loss_4_std_sig]
yaxis_train_loss = [train_loss_0,test_loss_2_stp, train_loss_3,train_loss_4_stp, test_loss_2_sig, train_loss_4_sig]
yaxis_std_train_loss = [train_loss_0_std,train_loss_2_std_stp, train_loss_3_std,train_loss_4_std_stp, train_loss_2_std_sig, train_loss_4_std_sig]
axs_title = ['Base NN', 'Binarised NN (step act.)', 'Base NN + L1 Penalty', 'Binarised NN + L1 & Binary Penalty (step act.)', 'Binarised NN (sigm act.)', 'Binarised NN + L1 & Binary Penalty (sigm act.)']
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for plot_ind in range(len(yaxis_test_loss)):
    ax[0].errorbar(y, yaxis_test_loss[plot_ind].mean(axis=0), yerr=np.array(yaxis_std_test_loss[plot_ind])*1.96/(n_reps**0.5), marker='o', linestyle=' ', label = axs_title[plot_ind])
    ax[0].set_xlabel("$\Delta$E")
    ax[0].set_ylabel("loss")
    # ax[0].set_yscale("log")
    ax[0].set_title(f'Mean and Std of MSE for Test Set over {n_reps} Repetitions', fontsize=10)
    ax[0].legend()
    ax[1].plot(range(n_reps), np.cumsum(yaxis_train_loss[plot_ind].mean(axis=1)), marker='o', linestyle='--', label=axs_title[plot_ind])
    ax[1].set_xlabel("Repetition No.")
    # ax[1].set_yscale("log")
    ax[1].set_ylabel("loss")
    ax[1].set_title(f'Cumulative Mean of MSE for Train Set over {n_reps} Repetitions', fontsize=10)
    ax[1].legend()
