In [1]:
import epoch_based_evolution as ebe
import load_data as ld # for loading and transforming data

# Preprocessing

In [2]:
DATA_ID = 41143
X_train, y_train, X_val, y_val, X_test, y_test = ld.get_preprocessed_data(
        dataset_id=DATA_ID, scaling=True, random_seed=13, return_as='tensor')


Categorical features detected: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V44', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V57', 'V58', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V127', 'V128', 'V129', 'V130', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V140', 'V141', 'V142'

# EBE

In [3]:
# The input and output sizes are needed for the model, these are integers
input_size, output_size = ld.get_tensor_sizes(X_train, y_train)

In [4]:
# The search space for the architectures is defined here, 
# possible arguments can be modified, check the class definition

search_space = ebe.SearchSpace(input_size=input_size, output_size=output_size)

In [5]:
N_INDIVIDUALS = 100 # amount of architectures to be evaluated as a starting point
N_EPOCHS = 5 
percentile_drop = 25 # drop the worst 25% of architectures after each epoch

In [6]:
# The generation is created given the search space and the number of individuals.
generation = ebe.Generation(search_space, N_INDIVIDUALS)

In [7]:
generation.run_ebe(n_epochs=N_EPOCHS,
                    X_train=X_train,
                    y_train=y_train,
                    X_val=X_val,
                    y_val=y_val, percentile_drop=percentile_drop)

results_df = generation.return_df()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
display(results_df.head(10))

Unnamed: 0,hidden_layers,activation_fn,dropout_rate,optimizer_type,learning_rate,weight_decay,momentum,batch_size,use_skip_connections,initializer,lr_scheduler,scheduler_params,train_loss,train_acc,val_loss,val_acc
0,"[70, 415, 111, 372, 345]",<class 'torch.nn.modules.activation.GELU'>,0.1,<class 'torch.optim.adamw.AdamW'>,0.006822,1e-05,,32,True,kaiming_uniform,step,"{'step_size': 30, 'gamma': 0.1}",0.420701,0.80461,0.442724,0.776151
1,"[489, 193, 342, 34]",<class 'torch.nn.modules.activation.GELU'>,0.1,<class 'torch.optim.adamw.AdamW'>,0.0003,0.01,,256,True,kaiming_uniform,none,{},0.408037,0.8088,0.443033,0.769874
2,"[435, 105, 437, 305, 487, 497, 464]",<class 'torch.nn.modules.activation.GELU'>,0.2,<class 'torch.optim.rmsprop.RMSprop'>,0.00281,1e-06,,128,False,xavier_normal,none,{},0.490455,0.753798,0.458377,0.767782
3,"[105, 375, 410, 417, 62]",<class 'torch.nn.modules.activation.GELU'>,0.2,<class 'torch.optim.adamw.AdamW'>,0.000453,1e-06,,128,True,kaiming_normal,step,"{'step_size': 30, 'gamma': 0.1}",0.452052,0.77999,0.443045,0.76569
4,"[187, 14, 21, 152, 128, 100, 97]",<class 'torch.nn.modules.activation.ELU'>,0.0,<class 'torch.optim.adam.Adam'>,0.001751,0.01,,1024,True,xavier_normal,none,{},0.413033,0.801467,0.455774,0.76569
5,"[172, 402, 455, 266]",<class 'torch.nn.modules.activation.LeakyReLU'>,0.5,<class 'torch.optim.adamw.AdamW'>,0.008607,0.01,,256,True,kaiming_normal,cosine,{'T_max': 50},0.459291,0.778418,0.447765,0.761506
6,"[137, 214, 461, 383, 340, 427]",<class 'torch.nn.modules.activation.ReLU'>,0.3,<class 'torch.optim.adam.Adam'>,0.00086,0.001,,32,True,kaiming_uniform,step,"{'step_size': 30, 'gamma': 0.9}",0.450167,0.788895,0.445986,0.759414
7,"[65, 123, 37]",<class 'torch.nn.modules.activation.ELU'>,0.3,<class 'torch.optim.adam.Adam'>,0.029568,0.0001,,128,True,xavier_normal,cosine,{'T_max': 10},0.458199,0.78418,0.455454,0.759414
8,"[341, 99, 106, 21]",<class 'torch.nn.modules.activation.ReLU'>,0.2,<class 'torch.optim.rmsprop.RMSprop'>,0.002528,0.01,,256,True,kaiming_uniform,cosine,{'T_max': 100},0.417077,0.807229,0.462433,0.757322
9,"[261, 252, 214]",<class 'torch.nn.modules.activation.LeakyReLU'>,0.3,<class 'torch.optim.rmsprop.RMSprop'>,0.006624,0.01,,64,False,xavier_uniform,none,{},0.453061,0.780513,0.464886,0.75523


# Train N top model

In [9]:
best_model = ebe.create_model_from_row(results_df.iloc[0], 
                                  input_size=input_size, output_size=output_size)

In [20]:
batch_size = 64 # batch size for training the model
# For training a model, DataLoader is needed
train_loader = ebe.create_dataloaders(X=X_train, y=y_train, 
                       batch_size=batch_size)
val_loader = ebe.create_dataloaders(X=X_val, y=y_val, 
                       batch_size=batch_size)
test_loader = ebe.create_dataloaders(X=X_test, y=y_test, 
                       batch_size=batch_size)

In [21]:
best_train_loss, best_train_acc, best_val_loss, best_val_acc = best_model.es_train(train_loader=train_loader, val_loader=val_loader,
                    es_patience=50, # epochs without improvement
                    max_epochs=1000, # cap for epochs
                    verbose=True, # print training progress
)

New best acc found: 0.7510460251046025
Epoch 1: Train Loss=0.4751, Train Acc=0.7643, Val Loss=0.4627, Val Acc=0.7510
Epoch 2: Train Loss=0.4159, Train Acc=0.8062, Val Loss=0.4671, Val Acc=0.7448
New best acc found: 0.7531380753138075
Epoch 3: Train Loss=0.3699, Train Acc=0.8303, Val Loss=0.4920, Val Acc=0.7531
New best acc found: 0.7573221757322176
Epoch 4: Train Loss=0.3273, Train Acc=0.8617, Val Loss=0.4897, Val Acc=0.7573
Epoch 5: Train Loss=0.2745, Train Acc=0.8937, Val Loss=0.5846, Val Acc=0.7490
Epoch 6: Train Loss=0.2396, Train Acc=0.8910, Val Loss=0.6755, Val Acc=0.7134
Epoch 7: Train Loss=0.2109, Train Acc=0.9162, Val Loss=0.6460, Val Acc=0.7469
New best acc found: 0.7594142259414226
Epoch 8: Train Loss=0.1654, Train Acc=0.9314, Val Loss=0.7804, Val Acc=0.7594
Epoch 9: Train Loss=0.1278, Train Acc=0.9455, Val Loss=0.8703, Val Acc=0.7469
Epoch 10: Train Loss=0.1330, Train Acc=0.9471, Val Loss=0.8067, Val Acc=0.7406
Epoch 11: Train Loss=0.1002, Train Acc=0.9612, Val Loss=0.9279,

In [None]:
test_loss, test_accuracy = best_model.evaluate(test_loader)
print(f"Test loss: {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

Validation loss: 0.8538, Validation accuracy: 0.7688


Now it can be used as a regular nn.Module model, as well

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Inference
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = best_model.to(device)
best_model.eval()
X_test_tensor = X_test.to(device)

with torch.no_grad():
    predictions = best_model(X_test_tensor)

In [None]:
_, predicted_classes = torch.max(predictions, dim=1)

In [None]:
predicted_classes

tensor([0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,
        1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
        1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
        1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
        0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1,
        1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1,
        0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,
        0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1,
        0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,