In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F

from netam import framework, models
from netam.common import pick_device, print_parameter_count

In [2]:
# shmoof_data_path = "/Users/matsen/data/shmoof_edges_11-Jan-2023_NoNode0_iqtree_K80+R_masked.csv"
shmoof_data_path = "/Users/matsen/data/shmoof_pcp_2023-11-30_MASKED.csv"
train_df, val_df = framework.load_shmoof_dataframes(shmoof_data_path, val_nickname="51") # , sample_count=5000)

In [3]:
kmer_length = 3
site_count = 500

train_dataset = framework.SHMoofDataset(train_df, kmer_length=kmer_length, site_count=site_count)
val_dataset = framework.SHMoofDataset(val_df, kmer_length=kmer_length, site_count=site_count)

device = pick_device()
train_dataset.to(device)
val_dataset.to(device)

print(f"we have {len(train_dataset)} training examples and {len(val_dataset)} validation examples")

Using Metal Performance Shaders
we have 26592 training examples and 22424 validation examples


In [4]:
# [I 2023-12-03 13:38:46,435] Trial 37 finished with value: 0.0603752824113363 and parameters: {'kernel_size': 19, 'dropout': 0.0, 'embedding_dim': 12, 'filter_count': 12}. Best is trial 37 with value: 0.0603752824113363.

cat_params = {
    "dropout": [0.0, 0.1, 0.2, 0.3],
}
int_params = {
}
float_params = {
}
log_float_params = {
}
# Note that if anything appears below and above, the above gets priority.
fixed_hyperparams = {
    "filter_count": 12,
    "embedding_dim": 12,
    "kernel_size": 19,
    "kmer_length": kmer_length,
    "batch_size": 1024,
    "learning_rate": 0.1,
    "min_learning_rate": 1e-3, # early stopping!
    "dropout": 0.1,
    "l2_regularization_coeff": 1e-6,
    "min_parameter_count": 2048,
    "max_parameter_count": 4096,
}
epochs = 1000
optuna_steps = 50

hyper_burrito = framework.HyperBurrito(pick_device(), train_dataset, val_dataset, models.CNNModel,  epochs=epochs)

hyper_burrito.optuna_optimize(optuna_steps, cat_params, int_params, float_params, log_float_params, fixed_hyperparams)

[I 2023-12-03 15:29:46,715] A new study created in memory with name: no-name-3a88dc25-8f75-454a-9cd6-c8925d38491a


Using Metal Performance Shaders


Epoch:   5%|▌         | 54/1000 [06:12<1:48:37,  6.89s/it, loss_diff=7.577e-07, lr=3.2e-5, val_loss=0.06045]  
[I 2023-12-03 15:36:04,476] Trial 0 finished with value: 0.06043745133702856 and parameters: {'dropout': 0.2}. Best is trial 0 with value: 0.06043745133702856.
Epoch:   5%|▍         | 47/1000 [06:00<2:01:55,  7.68s/it, loss_diff=1.709e-06, lr=3.2e-5, val_loss=0.06048]  
[I 2023-12-03 15:42:11,047] Trial 1 finished with value: 0.060472374750733845 and parameters: {'dropout': 0.3}. Best is trial 0 with value: 0.06043745133702856.
Epoch:   5%|▌         | 54/1000 [06:41<1:57:17,  7.44s/it, loss_diff=1.483e-06, lr=3.2e-5, val_loss=0.06046]  
[I 2023-12-03 15:48:57,720] Trial 2 finished with value: 0.060418437370286855 and parameters: {'dropout': 0.0}. Best is trial 2 with value: 0.060418437370286855.
Epoch:  18%|█▊        | 177/1000 [22:42<1:45:36,  7.70s/it, loss_diff=3.567e-08, lr=3.2e-5, val_loss=0.06152]  
[I 2023-12-03 16:11:46,741] Trial 3 finished with value: 0.0615047374982

KeyboardInterrupt: 