In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Imports

In [25]:
from datasets import load_dataset
from irls_optimizer import IRLS
from Adam import AdamOptim
from SGD import SGD
from Logreg import LogisticRegression 
from utils import fitComparisonModels, createFeatureInteractions

from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt

np.seterr(divide = 'ignore') 
np.seterr(invalid='ignore')
np.seterr(over ='ignore')

{'divide': 'ignore', 'over': 'ignore', 'under': 'ignore', 'invalid': 'ignore'}

# Blood (small dataset)

In [26]:
dataset_name = "mstz/blood"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
print(Xy.shape)
Xy.head()

(748, 4)


Unnamed: 0,months_since_last_donation,total_donation,total_blood_donated_in_cc,has_donated_last_month
0,2,50,12500,1
1,0,13,3250,1
2,1,16,4000,1
3,2,20,5000,1
4,1,24,6000,0


In [27]:
def optimizer_factory(name, **kwargs):
    if name == "irls":
        return IRLS()
    elif name == "adam":
        return AdamOptim(**kwargs)
    elif name == "sgd":
        return SGD(**kwargs)
    else:
        raise ValueError("irls, adam and sgd only supported")

In [28]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    ss = StandardScaler()
    ss.fit(X_train)
    X_train = ss.transform(X_train)
    X_val = ss.transform(X_val)
    X_test = ss.transform(X_test)
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=100, batch_size=X_train.shape[0])
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("model_performance.csv", index=True)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))


Running irls


Epoch::  14%|█▍        | 14/100 [00:00<00:01, 44.75it/s]


Early stopping after epoch 14
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 473.14it/s]


Running sgd


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 1287.20it/s]


Running irls


Epoch::   5%|▌         | 5/100 [00:00<00:01, 74.43it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 836.23it/s]


Running sgd


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 1125.53it/s]


Running irls


Epoch::   7%|▋         | 7/100 [00:00<00:02, 44.39it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 975.98it/s]


Running sgd


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 784.77it/s]


Running irls


Epoch::  10%|█         | 10/100 [00:00<00:02, 40.02it/s]


Early stopping after epoch 10
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 705.83it/s]


Running sgd


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 1114.84it/s]


Running irls


Epoch::   5%|▌         | 5/100 [00:00<00:02, 44.24it/s]

Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss





Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 1503.65it/s]


Running sgd


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 1537.16it/s]


# Blood (small dataset) with interactions

In [29]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_int = createFeatureInteractions(X.values)
    X_train, X_test, y_train, y_test = train_test_split(X_int, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
        
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}+int")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=100, batch_size=X_train.shape[0])
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name+"+int", seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda+int', 'qda+int', 'dt+int', 'rf+int']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("model_performance.csv", index=True, mode="a",  header=False)


for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}Int.npy", np.array(loss))

Running irls+int


Epoch::   6%|▌         | 6/100 [00:00<00:03, 30.07it/s]

Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss





Running adam+int


Epoch::   7%|▋         | 7/100 [00:00<00:00, 534.35it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   4%|▍         | 4/100 [00:00<00:00, 440.24it/s]

Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss





Running irls+int


Epoch::   5%|▌         | 5/100 [00:00<00:03, 26.02it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch::   8%|▊         | 8/100 [00:00<00:00, 1164.84it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   4%|▍         | 4/100 [00:00<00:00, 272.01it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls+int


Epoch::   6%|▌         | 6/100 [00:00<00:04, 21.63it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch::   7%|▋         | 7/100 [00:00<00:00, 767.44it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   4%|▍         | 4/100 [00:00<00:00, 150.48it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls+int


Epoch::  11%|█         | 11/100 [00:00<00:01, 48.19it/s]


Early stopping after epoch 11
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch::   6%|▌         | 6/100 [00:00<00:00, 642.13it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   4%|▍         | 4/100 [00:00<00:00, 487.34it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls+int


Epoch::   5%|▌         | 5/100 [00:00<00:01, 53.28it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch::   6%|▌         | 6/100 [00:00<00:00, 1057.65it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   4%|▍         | 4/100 [00:00<00:00, 307.30it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss


# UCI Shopper (big dataset)

In [30]:
dataset_name = "jlh/uci-shopper"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
print(Xy.shape)
Xy.head()

(12330, 18)


Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,0
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,0
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,0
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,0
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,0


In [31]:
oe = OrdinalEncoder()
oe_cols = ["Month", "VisitorType", "Weekend"]
oe.fit(X[oe_cols])
X[oe_cols] = oe.transform(X[oe_cols])

# seed = 42
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)

In [32]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=100, batch_size=X_train.shape[0])
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("model_performance.csv", index=True, mode="a",  header=False)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))

Running irls


Epoch::   8%|▊         | 8/100 [00:07<01:27,  1.05it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  13%|█▎        | 13/100 [00:00<00:01, 44.37it/s]


Early stopping after epoch 13
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:01, 62.18it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::  12%|█▏        | 12/100 [00:07<00:56,  1.54it/s]


Early stopping after epoch 12
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:01<00:00, 96.09it/s]


Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:01, 64.53it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   8%|▊         | 8/100 [00:05<01:00,  1.51it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:01<00:00, 92.90it/s]


Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:02, 44.93it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::  12%|█▏        | 12/100 [00:07<00:56,  1.57it/s]


Early stopping after epoch 12
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:01<00:00, 97.27it/s]


Running sgd


Epoch::   6%|▌         | 6/100 [00:00<00:02, 41.26it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss




Running irls


Epoch::   7%|▋         | 7/100 [00:04<01:03,  1.46it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 100/100 [00:00<00:00, 106.89it/s]


Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:01, 59.29it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss


# mstz/heart_failure (big dataset)

In [33]:
dataset_name = "mstz/heart_failure"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
print(Xy.shape)
Xy.head()

(299, 13)


Unnamed: 0,age,has_anaemia,creatinine_phosphokinase_concentration_in_blood,has_diabetes,heart_ejection_fraction,has_high_blood_pressure,platelets_concentration_in_blood,serum_creatinine_concentration_in_blood,serum_sodium_concentration_in_blood,is_male,is_smoker,days_in_study,is_dead
0,75,False,582.0,False,20.0,True,265000.0,1.9,130.0,True,False,4,1
1,55,False,7861.0,False,38.0,False,263358.03,1.1,136.0,True,False,6,1
2,65,False,146.0,False,20.0,False,162000.0,1.3,129.0,True,True,7,1
3,50,True,111.0,False,20.0,False,210000.0,1.9,137.0,True,False,7,1
4,65,True,160.0,True,20.0,False,327000.0,2.7,116.0,False,False,8,1


In [34]:
oe_cols = ["has_anaemia", "has_diabetes", "has_high_blood_pressure", "is_male", "is_smoker"]
oe = OrdinalEncoder()
oe.fit(Xy.loc[:, oe_cols])
Xy.loc[:, oe_cols] = oe.transform(Xy[oe_cols])
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
X.head()

Unnamed: 0,age,has_anaemia,creatinine_phosphokinase_concentration_in_blood,has_diabetes,heart_ejection_fraction,has_high_blood_pressure,platelets_concentration_in_blood,serum_creatinine_concentration_in_blood,serum_sodium_concentration_in_blood,is_male,is_smoker,days_in_study
0,75,0.0,582.0,0.0,20.0,1.0,265000.0,1.9,130.0,1.0,0.0,4
1,55,0.0,7861.0,0.0,38.0,0.0,263358.03,1.1,136.0,1.0,0.0,6
2,65,0.0,146.0,0.0,20.0,0.0,162000.0,1.3,129.0,1.0,1.0,7
3,50,1.0,111.0,0.0,20.0,0.0,210000.0,1.9,137.0,1.0,0.0,7
4,65,1.0,160.0,1.0,20.0,0.0,327000.0,2.7,116.0,0.0,0.0,8


In [35]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=100, batch_size=X_train.shape[0])
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("model_performance.csv", index=True, mode="a",  header=False)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))

Running irls


Epoch::  15%|█▌        | 15/100 [00:00<00:01, 84.25it/s]


Early stopping after epoch 15
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::   7%|▋         | 7/100 [00:00<00:00, 384.47it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:00, 513.76it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   7%|▋         | 7/100 [00:00<00:00, 163.17it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  14%|█▍        | 14/100 [00:00<00:00, 510.70it/s]


Early stopping after epoch 14
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:00, 291.14it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   5%|▌         | 5/100 [00:00<00:00, 121.44it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::   8%|▊         | 8/100 [00:00<00:00, 241.23it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:00, 298.57it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::  13%|█▎        | 13/100 [00:00<00:00, 219.88it/s]


Early stopping after epoch 13
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::   9%|▉         | 9/100 [00:00<00:00, 323.06it/s]


Early stopping after epoch 9
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:00, 227.64it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::  10%|█         | 10/100 [00:00<00:01, 62.91it/s]


Early stopping after epoch 10
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  10%|█         | 10/100 [00:00<00:00, 309.91it/s]


Early stopping after epoch 10
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   4%|▍         | 4/100 [00:00<00:00, 555.33it/s]


Early stopping after epoch 4
Reverting to the weights corresponding to the lowest validation loss
