In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F

from netam import framework, models
from epam.torch_common import pick_device, print_parameter_count

In [9]:
shmoof_data_path = "/Users/matsen/data/shmoof_edges_11-Jan-2023_NoNode0_iqtree_K80+R_masked.csv"
all_df = pd.read_csv(shmoof_data_path)

# Here's the fraction of sequences of length more than 410
(all_df["parent"].str.len() > 410).sum() / len(all_df)

0.00403216242498992

In [10]:
train_df, val_df = framework.load_shmoof_dataframes("/Users/matsen/data/shmoof_edges_11-Jan-2023_NoNode0_iqtree_K80+R_masked.csv", val_nickname="51") #, sample_count=5000)


In [11]:
kmer_length = 3
max_length = 410

train_dataset = framework.SHMoofDataset(train_df, kmer_length=kmer_length, max_length=max_length)
val_dataset = framework.SHMoofDataset(val_df, kmer_length=kmer_length, max_length=max_length)

device = pick_device()
train_dataset.to(device)
val_dataset.to(device)

print(f"we have {len(train_dataset)} training examples and {len(val_dataset)} validation examples")

Using Metal Performance Shaders
we have 26592 training examples and 22424 validation examples


In [6]:
cat_params = {
    "kernel_size": [9, 11, 13, 15],
    "dropout": [0.0, 0.1, 0.2, 0.3, 0.4],
}
int_params = {
    "embedding_dim": (7, 20),
    "num_filters": (7, 20),
}
float_params = {
}
log_float_params = {
}
# Note that if anything appears below and above, the above gets priority.
fixed_hyperparams = {
    "num_filters": 12,
    "embedding_dim": 18,
    "kernel_size": 13,
    "batch_size": 1024,
    "learning_rate": 0.1,
    "min_learning_rate": 1e-3, # early stopping!
    "dropout": 0.1,
    "l2_regularization_coeff": 1e-6,
    "max_parameter_count": 2048,
}
epochs = 100
optuna_steps = 45

hyper_burrito = framework.HyperBurrito(pick_device(), train_dataset, val_dataset, models.CNNModel,  epochs=epochs)

hyper_burrito.optuna_optimize(optuna_steps, cat_params, int_params, float_params, log_float_params, fixed_hyperparams)

[I 2023-11-23 13:00:49,822] A new study created in memory with name: no-name-09bcbeb1-b157-4546-ad3f-77aea9a05d7a


Using Metal Performance Shaders


Epoch:  49%|████▉     | 49/100 [04:09<04:19,  5.09s/it, loss_diff=1.178e-07, lr=3.2e-5]  
[I 2023-11-23 13:05:03,310] Trial 0 finished with value: 0.06076791437686319 and parameters: {'kernel_size': 11, 'dropout': 0.2, 'embedding_dim': 10, 'num_filters': 7}. Best is trial 0 with value: 0.06076791437686319.
[I 2023-11-23 13:05:03,313] Trial 1 finished with value: 1000000000.0 and parameters: {'kernel_size': 15, 'dropout': 0.0, 'embedding_dim': 18, 'num_filters': 9}. Best is trial 0 with value: 0.06076791437686319.
[I 2023-11-23 13:05:03,316] Trial 2 finished with value: 1000000000.0 and parameters: {'kernel_size': 15, 'dropout': 0.1, 'embedding_dim': 14, 'num_filters': 14}. Best is trial 0 with value: 0.06076791437686319.
[I 2023-11-23 13:05:03,319] Trial 3 finished with value: 1000000000.0 and parameters: {'kernel_size': 13, 'dropout': 0.3, 'embedding_dim': 18, 'num_filters': 19}. Best is trial 0 with value: 0.06076791437686319.
[I 2023-11-23 13:05:03,322] Trial 4 finished with value: 

Trial rejected because model has 3619 > 2048 parameters.
Trial rejected because model has 3879 > 2048 parameters.
Trial rejected because model has 5655 > 2048 parameters.
Trial rejected because model has 3221 > 2048 parameters.
Trial rejected because model has 2491 > 2048 parameters.


Epoch:  46%|████▌     | 46/100 [04:35<05:23,  5.98s/it, loss_diff=9.142e-07, lr=3.2e-5]  
[I 2023-11-23 13:09:42,897] Trial 6 finished with value: 0.06077127770663286 and parameters: {'kernel_size': 9, 'dropout': 0.2, 'embedding_dim': 11, 'num_filters': 8}. Best is trial 0 with value: 0.06076791437686319.
Epoch:  35%|███▌      | 35/100 [03:22<06:15,  5.78s/it, loss_diff=-1.076e-06, lr=3.2e-5] 
[I 2023-11-23 13:13:10,114] Trial 7 finished with value: 0.06077457853179934 and parameters: {'kernel_size': 13, 'dropout': 0.3, 'embedding_dim': 10, 'num_filters': 7}. Best is trial 0 with value: 0.06076791437686319.
Epoch:  53%|█████▎    | 53/100 [05:18<04:42,  6.01s/it, loss_diff=8.847e-07, lr=3.2e-5]  
[I 2023-11-23 13:18:33,877] Trial 8 finished with value: 0.0606860629842084 and parameters: {'kernel_size': 9, 'dropout': 0.2, 'embedding_dim': 7, 'num_filters': 15}. Best is trial 8 with value: 0.0606860629842084.
[I 2023-11-23 13:18:33,880] Trial 9 finished with value: 1000000000.0 and parame

Trial rejected because model has 2121 > 2048 parameters.


Epoch:  38%|███▊      | 38/100 [03:55<06:23,  6.19s/it, loss_diff=3.119e-08, lr=3.2e-5]  
[I 2023-11-23 13:22:33,652] Trial 10 finished with value: 0.060782141727004954 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 7, 'num_filters': 12}. Best is trial 8 with value: 0.0606860629842084.
Epoch:  38%|███▊      | 38/100 [03:39<05:58,  5.78s/it, loss_diff=-2.351e-07, lr=3.2e-5] 
[I 2023-11-23 13:26:17,316] Trial 11 finished with value: 0.06068952841457621 and parameters: {'kernel_size': 11, 'dropout': 0.2, 'embedding_dim': 7, 'num_filters': 17}. Best is trial 8 with value: 0.0606860629842084.
Epoch:  61%|██████    | 61/100 [05:49<03:43,  5.73s/it, loss_diff=-1.383e-06, lr=3.2e-5] 
[I 2023-11-23 13:32:11,032] Trial 12 finished with value: 0.061609826055682645 and parameters: {'kernel_size': 11, 'dropout': 0.2, 'embedding_dim': 7, 'num_filters': 16}. Best is trial 8 with value: 0.0606860629842084.
Epoch:  56%|█████▌    | 56/100 [05:36<04:24,  6.01s/it, loss_diff=-5.221e-0

Trial rejected because model has 3442 > 2048 parameters.


Epoch:  48%|████▊     | 48/100 [04:51<05:15,  6.06s/it, loss_diff=-2.168e-07, lr=3.2e-5] 
[I 2023-11-23 13:42:48,164] Trial 15 finished with value: 0.0607248074240528 and parameters: {'kernel_size': 9, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 12}. Best is trial 13 with value: 0.06064266746825845.
Epoch:  54%|█████▍    | 54/100 [05:35<04:45,  6.21s/it, loss_diff=6.876e-07, lr=3.2e-5]  
[I 2023-11-23 13:48:28,435] Trial 16 finished with value: 0.06062803018467366 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 9, 'num_filters': 13}. Best is trial 16 with value: 0.06062803018467366.
Epoch:  51%|█████     | 51/100 [04:50<04:39,  5.71s/it, loss_diff=-1.075e-06, lr=3.2e-5] 
[I 2023-11-23 13:53:23,570] Trial 17 finished with value: 0.06067485788247647 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 9, 'num_filters': 12}. Best is trial 16 with value: 0.06062803018467366.
[I 2023-11-23 13:53:23,581] Trial 18 finished with value: 1000000000.0 and

Trial rejected because model has 4395 > 2048 parameters.
Trial rejected because model has 3069 > 2048 parameters.


Epoch:  44%|████▍     | 44/100 [04:38<05:54,  6.32s/it, loss_diff=-1.162e-06, lr=3.2e-5] 
[I 2023-11-23 13:58:06,910] Trial 20 finished with value: 0.06076915809619524 and parameters: {'kernel_size': 9, 'dropout': 0.0, 'embedding_dim': 9, 'num_filters': 11}. Best is trial 16 with value: 0.06062803018467366.
Epoch:  45%|████▌     | 45/100 [04:35<05:36,  6.13s/it, loss_diff=-5.024e-07, lr=3.2e-5] 
[I 2023-11-23 14:02:47,953] Trial 21 finished with value: 0.060661875734871896 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 9, 'num_filters': 13}. Best is trial 16 with value: 0.06062803018467366.
Epoch:  42%|████▏     | 42/100 [04:23<06:04,  6.28s/it, loss_diff=-6.076e-07, lr=3.2e-5] 
[I 2023-11-23 14:07:16,695] Trial 22 finished with value: 0.06067127836492714 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 9, 'num_filters': 14}. Best is trial 16 with value: 0.06062803018467366.
Epoch:  50%|█████     | 50/100 [05:18<05:18,  6.37s/it, loss_diff=-1.461

Trial rejected because model has 2431 > 2048 parameters.


Epoch:  47%|████▋     | 47/100 [04:30<05:05,  5.76s/it, loss_diff=-4.281e-07, lr=3.2e-5] 
[I 2023-11-23 14:17:14,652] Trial 25 finished with value: 0.06076239533735691 and parameters: {'kernel_size': 9, 'dropout': 0.4, 'embedding_dim': 8, 'num_filters': 10}. Best is trial 16 with value: 0.06062803018467366.
[I 2023-11-23 14:17:14,663] Trial 26 finished with value: 1000000000.0 and parameters: {'kernel_size': 9, 'dropout': 0.0, 'embedding_dim': 15, 'num_filters': 16}. Best is trial 16 with value: 0.06062803018467366.
[I 2023-11-23 14:17:14,673] Trial 27 finished with value: 1000000000.0 and parameters: {'kernel_size': 15, 'dropout': 0.4, 'embedding_dim': 11, 'num_filters': 11}. Best is trial 16 with value: 0.06062803018467366.
[I 2023-11-23 14:17:14,680] Trial 28 finished with value: 1000000000.0 and parameters: {'kernel_size': 13, 'dropout': 0.1, 'embedding_dim': 9, 'num_filters': 18}. Best is trial 16 with value: 0.06062803018467366.


Trial rejected because model has 3168 > 2048 parameters.
Trial rejected because model has 2553 > 2048 parameters.
Trial rejected because model has 2728 > 2048 parameters.


Epoch:  48%|████▊     | 48/100 [05:07<05:33,  6.41s/it, loss_diff=-3.992e-07, lr=3.2e-5] 
[I 2023-11-23 14:22:26,937] Trial 29 finished with value: 0.060651904456463616 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 14}. Best is trial 16 with value: 0.06062803018467366.
Epoch:  55%|█████▌    | 55/100 [05:44<04:41,  6.26s/it, loss_diff=-7.843e-07, lr=3.2e-5] 
[I 2023-11-23 14:28:16,548] Trial 30 finished with value: 0.060582415494819304 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 16}. Best is trial 30 with value: 0.060582415494819304.
[I 2023-11-23 14:28:16,557] Trial 31 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 17}. Best is trial 30 with value: 0.060582415494819304.


Trial rejected because model has 2051 > 2048 parameters.


Epoch:  42%|████▏     | 42/100 [04:15<05:53,  6.10s/it, loss_diff=3.925e-07, lr=3.2e-5]  
[I 2023-11-23 14:32:36,826] Trial 32 finished with value: 0.06058643012840196 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 16}. Best is trial 30 with value: 0.060582415494819304.
[I 2023-11-23 14:32:36,837] Trial 33 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 10, 'num_filters': 16}. Best is trial 30 with value: 0.060582415494819304.


Trial rejected because model has 2443 > 2048 parameters.


Epoch:  63%|██████▎   | 63/100 [07:08<04:11,  6.80s/it, loss_diff=7.557e-07, lr=3.2e-5]  
[I 2023-11-23 14:39:50,625] Trial 34 finished with value: 0.06057477781241424 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 7, 'num_filters': 19}. Best is trial 34 with value: 0.06057477781241424.
[I 2023-11-23 14:39:50,634] Trial 35 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 8, 'num_filters': 19}. Best is trial 34 with value: 0.06057477781241424.
[I 2023-11-23 14:39:50,642] Trial 36 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.0, 'embedding_dim': 10, 'num_filters': 20}. Best is trial 34 with value: 0.06057477781241424.


Trial rejected because model has 2231 > 2048 parameters.
Trial rejected because model has 2891 > 2048 parameters.


Epoch:  40%|████      | 40/100 [04:13<06:20,  6.34s/it, loss_diff=2.006e-07, lr=3.2e-5]  
[I 2023-11-23 14:44:08,371] Trial 37 finished with value: 0.06059439282278227 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 19}. Best is trial 34 with value: 0.06057477781241424.
[I 2023-11-23 14:44:08,382] Trial 38 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 17, 'num_filters': 19}. Best is trial 34 with value: 0.06057477781241424.


Trial rejected because model has 4697 > 2048 parameters.


Epoch:  42%|████▏     | 42/100 [04:45<06:33,  6.79s/it, loss_diff=-3.354e-07, lr=3.2e-5] 
[I 2023-11-23 14:48:59,372] Trial 39 finished with value: 0.06061472934200599 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 18}. Best is trial 34 with value: 0.06057477781241424.
[I 2023-11-23 14:48:59,381] Trial 40 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 11, 'num_filters': 20}. Best is trial 34 with value: 0.06057477781241424.


Trial rejected because model has 3176 > 2048 parameters.


Epoch:  43%|████▎     | 43/100 [04:50<06:25,  6.76s/it, loss_diff=1.457e-06, lr=3.2e-5]  
[I 2023-11-23 14:53:55,211] Trial 41 finished with value: 0.060635300496040906 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 18}. Best is trial 34 with value: 0.06057477781241424.
Epoch:  55%|█████▌    | 55/100 [05:53<04:49,  6.43s/it, loss_diff=7.344e-07, lr=3.2e-5]  
[I 2023-11-23 14:59:53,423] Trial 42 finished with value: 0.06055767963900551 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 19}. Best is trial 42 with value: 0.06055767963900551.
[I 2023-11-23 14:59:53,435] Trial 43 finished with value: 1000000000.0 and parameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 8, 'num_filters': 19}. Best is trial 42 with value: 0.06055767963900551.
[I 2023-11-23 14:59:53,445] Trial 44 finished with value: 1000000000.0 and parameters: {'kernel_size': 15, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 20}. Bes

Trial rejected because model has 2231 > 2048 parameters.
Trial rejected because model has 2596 > 2048 parameters.
Best Hyperparameters: {'kernel_size': 11, 'dropout': 0.3, 'embedding_dim': 7, 'num_filters': 19}
Best Validation Loss: 0.06055767963900551
