In [35]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Imports

In [36]:
from datasets import load_dataset
from irls_optimizer import IRLS
from Adam import AdamOptim
from SGD import SGD
from Logreg import LogisticRegression 
from utils import fitComparisonModels, createFeatureInteractions

from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt

np.seterr(divide = 'ignore') 
np.seterr(invalid='ignore')
np.seterr(over ='ignore')

{'divide': 'ignore', 'over': 'ignore', 'under': 'ignore', 'invalid': 'ignore'}

# Blood (small dataset)

In [37]:
dataset_name = "mstz/blood"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
print(Xy.shape)
Xy.head()

(748, 4)


Unnamed: 0,months_since_last_donation,total_donation,total_blood_donated_in_cc,has_donated_last_month
0,2,50,12500,1
1,0,13,3250,1
2,1,16,4000,1
3,2,20,5000,1
4,1,24,6000,0


In [38]:
def optimizer_factory(name, **kwargs):
    if name == "irls":
        return IRLS()
    elif name == "adam":
        return AdamOptim(**kwargs)
    elif name == "sgd":
        return SGD(**kwargs)
    else:
        raise ValueError("irls, adam and sgd only supported")

In [39]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    ss = StandardScaler()
    ss.fit(X_train)
    X_train = ss.transform(X_train)
    X_val = ss.transform(X_val)
    X_test = ss.transform(X_test)
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        batch_size = 32
        if isinstance(optimizer, IRLS):
            batch_size = X_train.shape[0]
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=500, batch_size=batch_size)
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("results/balanced_accuracy_2.csv", index=True)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))


Running irls


Epoch::   3%|▎         | 14/500 [00:00<00:16, 29.25it/s]


Early stopping after epoch 14
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 339.93it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 386.90it/s]


Running irls


Epoch::   1%|          | 5/500 [00:00<00:14, 33.69it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  69%|██████▉   | 345/500 [00:01<00:00, 328.17it/s]


Early stopping after epoch 345
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   9%|▉         | 47/500 [00:00<00:01, 352.00it/s]


Early stopping after epoch 47
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   1%|▏         | 7/500 [00:00<00:09, 51.47it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:02<00:00, 201.96it/s]


Running sgd


Epoch::  49%|████▊     | 243/500 [00:00<00:00, 362.46it/s]


Early stopping after epoch 243
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   2%|▏         | 10/500 [00:00<00:22, 22.01it/s]


Early stopping after epoch 10
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 321.05it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 404.23it/s]


Running irls


Epoch::   1%|          | 5/500 [00:00<00:14, 34.49it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 342.89it/s]


Running sgd


Epoch::  18%|█▊        | 90/500 [00:00<00:01, 347.60it/s]


Early stopping after epoch 90
Reverting to the weights corresponding to the lowest validation loss


# Blood (small dataset) with interactions

In [40]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_int = createFeatureInteractions(X.values)
    X_train, X_test, y_train, y_test = train_test_split(X_int, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    ss = StandardScaler()
    ss.fit(X_train)
    X_train = ss.transform(X_train)
    X_val = ss.transform(X_val)
    X_test = ss.transform(X_test)    
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}+int")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        batch_size = 32
        if isinstance(optimizer, IRLS):
            batch_size = X_train.shape[0]
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=500, batch_size=batch_size)
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name+"+int", seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda+int', 'qda+int', 'dt+int', 'rf+int']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("results/balanced_accuracy_2.csv", index=True, mode="a",  header=False)


for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}Int.npy", np.array(loss))

Running irls+int


Epoch::   1%|          | 6/500 [00:00<00:13, 37.81it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 326.76it/s]


Running sgd+int


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 388.93it/s]


Running irls+int


Epoch::   1%|          | 5/500 [00:00<00:07, 69.22it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch::  67%|██████▋   | 336/500 [00:01<00:00, 331.46it/s]


Early stopping after epoch 336
Reverting to the weights corresponding to the lowest validation loss
Running sgd+int


Epoch::   9%|▉         | 47/500 [00:00<00:01, 382.82it/s]


Early stopping after epoch 47
Reverting to the weights corresponding to the lowest validation loss
Running irls+int


Epoch::   1%|          | 6/500 [00:00<00:12, 38.41it/s]


Early stopping after epoch 6
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch:: 100%|██████████| 500/500 [00:02<00:00, 248.90it/s]


Running sgd+int


Epoch::  35%|███▍      | 173/500 [00:00<00:01, 277.74it/s]


Early stopping after epoch 173
Reverting to the weights corresponding to the lowest validation loss
Running irls+int


Epoch::   2%|▏         | 11/500 [00:00<00:06, 80.95it/s]


Early stopping after epoch 11
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 301.25it/s]


Running sgd+int


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 333.23it/s]


Running irls+int


Epoch::   1%|          | 5/500 [00:00<00:16, 29.84it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam+int


Epoch:: 100%|██████████| 500/500 [00:01<00:00, 317.24it/s]


Running sgd+int


Epoch::  15%|█▍        | 74/500 [00:00<00:01, 376.04it/s]


Early stopping after epoch 74
Reverting to the weights corresponding to the lowest validation loss




# UCI Shopper (big dataset)

In [41]:
dataset_name = "jlh/uci-shopper"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
print(Xy.shape)
Xy.head()

(12330, 18)


Unnamed: 0,Administrative,Administrative_Duration,Informational,Informational_Duration,ProductRelated,ProductRelated_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Region,TrafficType,VisitorType,Weekend,Revenue
0,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,1,1,1,1,Returning_Visitor,False,0
1,0,0.0,0,0.0,2,64.0,0.0,0.1,0.0,0.0,Feb,2,2,1,2,Returning_Visitor,False,0
2,0,0.0,0,0.0,1,0.0,0.2,0.2,0.0,0.0,Feb,4,1,9,3,Returning_Visitor,False,0
3,0,0.0,0,0.0,2,2.666667,0.05,0.14,0.0,0.0,Feb,3,2,2,4,Returning_Visitor,False,0
4,0,0.0,0,0.0,10,627.5,0.02,0.05,0.0,0.0,Feb,3,3,1,4,Returning_Visitor,True,0


In [42]:
oe = OrdinalEncoder()
oe_cols = ["Month", "VisitorType", "Weekend"]
oe.fit(X[oe_cols])
X[oe_cols] = oe.transform(X[oe_cols])

# seed = 42
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)

In [43]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    ss = StandardScaler()
    ss.fit(X_train)
    X_train = ss.transform(X_train)
    X_val = ss.transform(X_val)
    X_test = ss.transform(X_test)
    
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        batch_size = 32
        if isinstance(optimizer, IRLS):
            batch_size = X_train.shape[0]
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=500, batch_size=batch_size)
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("results/balanced_accuracy_2.csv", index=True, mode="a",  header=False)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))

Running irls


Epoch::   2%|▏         | 8/500 [00:07<07:18,  1.12it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  32%|███▏      | 159/500 [00:14<00:31, 10.92it/s]


Early stopping after epoch 159
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   6%|▌         | 28/500 [00:02<00:41, 11.44it/s]


Early stopping after epoch 28
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   3%|▎         | 16/500 [00:13<06:35,  1.22it/s]


Early stopping after epoch 16
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  34%|███▍      | 171/500 [00:12<00:24, 13.18it/s]


Early stopping after epoch 171
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   8%|▊         | 38/500 [00:02<00:30, 15.17it/s]


Early stopping after epoch 38
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   2%|▏         | 8/500 [00:06<06:37,  1.24it/s]


Early stopping after epoch 8
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  32%|███▏      | 159/500 [00:11<00:25, 13.26it/s]


Early stopping after epoch 159
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   9%|▉         | 44/500 [00:03<00:35, 12.79it/s]


Early stopping after epoch 44
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   2%|▏         | 12/500 [00:09<06:21,  1.28it/s]


Early stopping after epoch 12
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:40<00:00, 12.28it/s]


Running sgd


Epoch::  38%|███▊      | 192/500 [00:12<00:20, 15.11it/s]


Early stopping after epoch 192
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   1%|▏         | 7/500 [00:04<05:12,  1.58it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch::  71%|███████▏  | 357/500 [00:22<00:08, 15.99it/s]


Early stopping after epoch 357
Reverting to the weights corresponding to the lowest validation loss
Running sgd


Epoch::   5%|▍         | 23/500 [00:01<00:29, 15.94it/s]


Early stopping after epoch 23
Reverting to the weights corresponding to the lowest validation loss


# mstz/heart_failure (big dataset)

In [44]:
dataset_name = "mstz/heart_failure"
ds = load_dataset(dataset_name)

Xy = ds["train"].with_format("pandas")[:]
print(Xy.shape)
Xy.head()

(299, 13)


Unnamed: 0,age,has_anaemia,creatinine_phosphokinase_concentration_in_blood,has_diabetes,heart_ejection_fraction,has_high_blood_pressure,platelets_concentration_in_blood,serum_creatinine_concentration_in_blood,serum_sodium_concentration_in_blood,is_male,is_smoker,days_in_study,is_dead
0,75,False,582.0,False,20.0,True,265000.0,1.9,130.0,True,False,4,1
1,55,False,7861.0,False,38.0,False,263358.03,1.1,136.0,True,False,6,1
2,65,False,146.0,False,20.0,False,162000.0,1.3,129.0,True,True,7,1
3,50,True,111.0,False,20.0,False,210000.0,1.9,137.0,True,False,7,1
4,65,True,160.0,True,20.0,False,327000.0,2.7,116.0,False,False,8,1


In [45]:
oe_cols = ["has_anaemia", "has_diabetes", "has_high_blood_pressure", "is_male", "is_smoker"]
oe = OrdinalEncoder()
oe.fit(Xy.loc[:, oe_cols])
Xy.loc[:, oe_cols] = oe.transform(Xy[oe_cols])
X = Xy.iloc[:, :-1]
y = Xy.iloc[:, -1]
X.head()

Unnamed: 0,age,has_anaemia,creatinine_phosphokinase_concentration_in_blood,has_diabetes,heart_ejection_fraction,has_high_blood_pressure,platelets_concentration_in_blood,serum_creatinine_concentration_in_blood,serum_sodium_concentration_in_blood,is_male,is_smoker,days_in_study
0,75,0.0,582.0,0.0,20.0,1.0,265000.0,1.9,130.0,1.0,0.0,4
1,55,0.0,7861.0,0.0,38.0,0.0,263358.03,1.1,136.0,1.0,0.0,6
2,65,0.0,146.0,0.0,20.0,0.0,162000.0,1.3,129.0,1.0,1.0,7
3,50,1.0,111.0,0.0,20.0,0.0,210000.0,1.9,137.0,1.0,0.0,7
4,65,1.0,160.0,1.0,20.0,0.0,327000.0,2.7,116.0,0.0,0.0,8


In [46]:
results = {}
losses = {}

splitting_seeds = [42, 43, 44, 45, 46]
optimizer_to_kwargs = {"irls": {}, "adam": {"eta": 0.001}, "sgd": {"eta": 0.01}}

for seed in splitting_seeds:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=seed)
    ss = StandardScaler()
    ss.fit(X_train)
    X_train = ss.transform(X_train)
    X_val = ss.transform(X_val)
    X_test = ss.transform(X_test)
    
    for optimizer_name, kwargs in optimizer_to_kwargs.items():
        print(f"Running {optimizer_name}")
        optimizer = optimizer_factory(optimizer_name, **kwargs)
        log_reg = LogisticRegression(input_dim=X_train.shape[1])
        batch_size = 32
        if isinstance(optimizer, IRLS):
            batch_size = X_train.shape[0]
        log_reg.train(X_train, y_train, X_val=X_val, y_val=y_val, optimizer=optimizer, patience=5, epochs=500, batch_size=batch_size)
        
        y_pred = log_reg.predict(X_test).round()
        ba_score = balanced_accuracy_score(y_pred, y_test)
        
        # Store the results
        results[(optimizer_name, seed)] = ba_score
        if seed == 42:
            losses[optimizer_name] = log_reg.losses
    # Other models
    y_lda, y_qda, y_dt, y_rf = fitComparisonModels(X_train, y_train, X_test)
    models = ['lda', 'qda', 'dt', 'rf']
    y_preds = [y_lda, y_qda, y_dt, y_rf]
    
    for model, y_pred in zip(models, y_preds):
        ba_score = balanced_accuracy_score(y_test, y_pred)
        results[(model, seed)] = ba_score

# Converting results to DataFrame
data = []
for (model, seed), ba_score in results.items():
    data.append({"optimizer": model, f"seed_{seed}_acc": ba_score})

df = pd.DataFrame(data)
df = df.groupby('optimizer').first().reset_index()
df["data_set"] = dataset_name
# multi_index = pd.MultiIndex.from_arrays([df['data_set'], df["optimizer"]], names=['data_set', 'optimizer'])
df.index = df['data_set']
df.drop('data_set', axis=1, inplace=True)
df.to_csv("results/balanced_accuracy_2.csv", index=True, mode="a",  header=False)

for optimizer, loss in losses.items():
    np.save(f"results/loss/{optimizer}/{dataset_name.replace('/', '-')}.npy", np.array(loss))

Running irls


Epoch::   2%|▏         | 11/500 [00:00<00:09, 51.49it/s]


Early stopping after epoch 11
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 622.32it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 835.69it/s]


Running irls


Epoch::   1%|▏         | 7/500 [00:00<00:10, 45.62it/s]


Early stopping after epoch 7
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 626.06it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 802.56it/s]


Running irls


Epoch::   1%|          | 5/500 [00:00<00:07, 62.64it/s]


Early stopping after epoch 5
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 641.66it/s]


Running sgd


Epoch::  45%|████▍     | 224/500 [00:00<00:00, 752.87it/s]


Early stopping after epoch 224
Reverting to the weights corresponding to the lowest validation loss
Running irls


Epoch::   3%|▎         | 13/500 [00:00<00:06, 70.47it/s]


Early stopping after epoch 13
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 591.63it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 811.15it/s]


Running irls


Epoch::   3%|▎         | 14/500 [00:00<00:05, 89.93it/s]


Early stopping after epoch 14
Reverting to the weights corresponding to the lowest validation loss
Running adam


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 627.23it/s]


Running sgd


Epoch:: 100%|██████████| 500/500 [00:00<00:00, 815.54it/s]
