In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F

from netam import framework, models
from netam.common import pick_device, print_parameter_count

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df, val_df = framework.load_shmoof_dataframes("/Users/matsen/data/shmoof_edges_11-Jan-2023_NoNode0_iqtree_K80+R_masked.csv", val_nickname="51") #, sample_count=5000)


In [3]:
kmer_length = 1
max_length = 410

train_dataset = framework.SHMoofDataset(train_df, kmer_length=kmer_length, max_length=max_length)
val_dataset = framework.SHMoofDataset(val_df, kmer_length=kmer_length, max_length=max_length)

device = pick_device()
train_dataset.to(device)
val_dataset.to(device)

print(f"we have {len(train_dataset)} training examples and {len(val_dataset)} validation examples")

Using Metal Performance Shaders
we have 26592 training examples and 22424 validation examples


In [4]:
class CNN1merModel(models.CNNModel):
    """
    This is a CNN model that uses individual bases as input and does not train an
    embedding layer.
    """
    def __init__(self, dataset, num_filters, kernel_size, dropout_prob=0.1):
        embedding_dim = 5
        super(CNN1merModel, self).__init__(dataset, embedding_dim, num_filters, kernel_size, dropout_prob)
        assert dataset.kmer_length == 1
        identity_matrix = torch.eye(embedding_dim)
        self.kmer_embedding.weight = nn.Parameter(identity_matrix, requires_grad=False)

In [5]:
cat_params = {
    "kernel_size": [7, 9, 11, 13, 15],
    "dropout": [0.0, 0.1, 0.2, 0.3, 0.4],
}
int_params = {
    "num_filters": (7, 20),
}
float_params = {
}
log_float_params = {
}
# Note that if anything appears below and above, the above gets priority.
fixed_hyperparams = {
    "num_filters": 12,
    "kernel_size": 13,
    "batch_size": 1024,
    "learning_rate": 0.1,
    "min_learning_rate": 1e-3, # early stopping!
    "dropout": 0.1,
    "l2_regularization_coeff": 1e-6,
    "max_parameter_count": 1024,
}
epochs = 100
optuna_steps = 45

hyper_burrito = framework.HyperBurrito(pick_device(), train_dataset, val_dataset, CNN1merModel,  epochs=epochs)

hyper_burrito.optuna_optimize(optuna_steps, cat_params, int_params, float_params, log_float_params, fixed_hyperparams)

[I 2023-11-25 04:34:56,806] A new study created in memory with name: no-name-e016a84d-860d-4e47-b421-4f15498681ac
[I 2023-11-25 04:34:56,811] Trial 0 finished with value: 1000000000.0 and parameters: {'kernel_size': 13, 'dropout': 0.3, 'num_filters': 17}. Best is trial 0 with value: 1000000000.0.


Using Metal Performance Shaders
Trial rejected because model has 1140 > 1024 parameters.


Epoch:  45%|████▌     | 45/100 [03:13<03:56,  4.29s/it, loss_diff=3.884e-07, lr=3.2e-5]  
[I 2023-11-25 04:38:13,995] Trial 1 finished with value: 0.06341478138593236 and parameters: {'kernel_size': 13, 'dropout': 0.3, 'num_filters': 13}. Best is trial 1 with value: 0.06341478138593236.
Epoch:  33%|███▎      | 33/100 [02:47<05:39,  5.06s/it, loss_diff=-3.976e-07, lr=3.2e-5] 
[I 2023-11-25 04:41:04,881] Trial 2 finished with value: 0.06391726726486008 and parameters: {'kernel_size': 15, 'dropout': 0.1, 'num_filters': 7}. Best is trial 1 with value: 0.06341478138593236.
Epoch:  33%|███▎      | 33/100 [02:44<05:34,  4.99s/it, loss_diff=-4.698e-07, lr=3.2e-5] 
[I 2023-11-25 04:43:54,340] Trial 3 finished with value: 0.061632560173337306 and parameters: {'kernel_size': 9, 'dropout': 0.1, 'num_filters': 11}. Best is trial 3 with value: 0.061632560173337306.
Epoch:  31%|███       | 31/100 [02:41<05:58,  5.20s/it, loss_diff=5.162e-07, lr=3.2e-5]  
[I 2023-11-25 04:46:39,834] Trial 4 finished w

Trial rejected because model has 1084 > 1024 parameters.


Epoch:  51%|█████     | 51/100 [04:41<04:29,  5.51s/it, loss_diff=1.957e-07, lr=3.2e-5]  
[I 2023-11-25 05:33:59,021] Trial 18 finished with value: 0.061798196785680916 and parameters: {'kernel_size': 9, 'dropout': 0.1, 'num_filters': 15}. Best is trial 15 with value: 0.061616363411199776.
Epoch:  42%|████▏     | 42/100 [04:09<05:43,  5.93s/it, loss_diff=4.242e-07, lr=3.2e-5]  
[I 2023-11-25 05:38:13,253] Trial 19 finished with value: 0.06311965162413613 and parameters: {'kernel_size': 7, 'dropout': 0.0, 'num_filters': 11}. Best is trial 15 with value: 0.061616363411199776.
[I 2023-11-25 05:38:13,259] Trial 20 finished with value: 1000000000.0 and parameters: {'kernel_size': 15, 'dropout': 0.4, 'num_filters': 17}. Best is trial 15 with value: 0.061616363411199776.


Trial rejected because model has 1310 > 1024 parameters.


Epoch:  38%|███▊      | 38/100 [03:44<06:07,  5.92s/it, loss_diff=6.913e-07, lr=3.2e-5]  
[I 2023-11-25 05:42:03,839] Trial 21 finished with value: 0.061433445097419395 and parameters: {'kernel_size': 9, 'dropout': 0.1, 'num_filters': 15}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  37%|███▋      | 37/100 [03:25<05:49,  5.55s/it, loss_diff=6.887e-07, lr=3.2e-5]  
[I 2023-11-25 05:45:33,829] Trial 22 finished with value: 0.06180555637908586 and parameters: {'kernel_size': 9, 'dropout': 0.1, 'num_filters': 14}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  46%|████▌     | 46/100 [04:33<05:20,  5.94s/it, loss_diff=9.289e-08, lr=3.2e-5]  
[I 2023-11-25 05:50:11,888] Trial 23 finished with value: 0.06199855483002506 and parameters: {'kernel_size': 9, 'dropout': 0.1, 'num_filters': 15}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  26%|██▌       | 26/100 [02:40<07:36,  6.16s/it, loss_diff=1.871e-09, lr=3.2e-5]  
[I 2023-11-25 05:52:57,568] Trial 24 fi

Trial rejected because model has 1207 > 1024 parameters.


Epoch:  38%|███▊      | 38/100 [03:43<06:04,  5.88s/it, loss_diff=6.887e-07, lr=3.2e-5]  
[I 2023-11-25 06:14:51,909] Trial 30 finished with value: 0.06212655679614171 and parameters: {'kernel_size': 7, 'dropout': 0.0, 'num_filters': 17}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  32%|███▏      | 32/100 [03:17<06:59,  6.17s/it, loss_diff=6.216e-07, lr=3.2e-5]  
[I 2023-11-25 06:18:14,785] Trial 31 finished with value: 0.06390798771823682 and parameters: {'kernel_size': 11, 'dropout': 0.2, 'num_filters': 14}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  37%|███▋      | 37/100 [03:28<05:55,  5.64s/it, loss_diff=-2.616e-08, lr=3.2e-5] 
[I 2023-11-25 06:21:48,283] Trial 32 finished with value: 0.06260972822106953 and parameters: {'kernel_size': 11, 'dropout': 0.2, 'num_filters': 13}. Best is trial 21 with value: 0.061433445097419395.
Epoch:  31%|███       | 31/100 [03:01<06:43,  5.84s/it, loss_diff=-5.877e-08, lr=3.2e-5] 
[I 2023-11-25 06:24:54,275] Trial 33 f

Trial rejected because model has 1027 > 1024 parameters.
Best Hyperparameters: {'kernel_size': 11, 'dropout': 0.2, 'num_filters': 17}
Best Validation Loss: 0.06142035313017063
