In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F

from netam import framework, models
from netam.common import pick_device, print_parameter_count

In [2]:
# shmoof_data_path = "/Users/matsen/data/shmoof_edges_11-Jan-2023_NoNode0_iqtree_K80+R_masked.csv"
shmoof_data_path = "/Users/matsen/data/shmoof_pcp_2023-11-30_MASKED.csv"
train_df, val_df = framework.load_shmoof_dataframes(shmoof_data_path, val_nickname="51") # , sample_count=5000)

In [3]:
kmer_length = 3
site_count = 500

train_dataset = framework.SHMoofDataset(train_df, kmer_length=kmer_length, site_count=site_count)
val_dataset = framework.SHMoofDataset(val_df, kmer_length=kmer_length, site_count=site_count)

device = pick_device()
train_dataset.to(device)
val_dataset.to(device)

print(f"we have {len(train_dataset)} training examples and {len(val_dataset)} validation examples")

Using Metal Performance Shaders
we have 26592 training examples and 22424 validation examples


In [4]:
# [I 2023-12-03 13:38:46,435] Trial 37 finished with value: 0.0603752824113363 and parameters: {'kernel_size': 19, 'dropout': 0.0, 'embedding_dim': 12, 'filter_count': 12}. Best is trial 37 with value: 0.0603752824113363.

cat_params = {
    "l2_regularization_coeff": [1e-6, 1e-5, 1e-4],
}
int_params = {
}
float_params = {
}
log_float_params = {
}
# Note that if anything appears below and above, the above gets priority.
fixed_hyperparams = {
    "kmer_length": kmer_length,
    "embedding_dim": 14,
    "filter_count": 25,
    "kernel_size": 15,
    "dropout_prob": 0.1,
    "batch_size": 1024,
    "learning_rate": 0.1,
    "min_learning_rate": 1e-3, # early stopping!
    "l2_regularization_coeff": 1e-6,
    "min_parameter_count": 1000,
    "max_parameter_count": 10000,
}
epochs = 30
optuna_steps = 50

hyper_burrito = framework.HyperBurrito(pick_device(), train_dataset, val_dataset, models.CNNModel,  epochs=epochs)

hyper_burrito.optuna_optimize(optuna_steps, cat_params, int_params, float_params, log_float_params, fixed_hyperparams)

[I 2023-12-08 11:26:16,972] A new study created in memory with name: no-name-76a13ef0-a0eb-4935-9cd2-79d445b58da5


Using Metal Performance Shaders


Epoch: 100%|██████████| 30/30 [02:45<00:00,  5.53s/it, loss_diff=-6.115e-05, lr=0.1, val_loss=0.06358]
[I 2023-12-08 11:29:08,105] Trial 0 finished with value: 0.0635777899608032 and parameters: {'l2_regularization_coeff': 1e-05}. Best is trial 0 with value: 0.0635777899608032.
Epoch: 100%|██████████| 30/30 [02:52<00:00,  5.76s/it, loss_diff=-3.052e-06, lr=0.1, val_loss=0.0657] 
[I 2023-12-08 11:32:06,382] Trial 1 finished with value: 0.0656961277525977 and parameters: {'l2_regularization_coeff': 1e-05}. Best is trial 0 with value: 0.0635777899608032.
Epoch: 100%|██████████| 30/30 [03:52<00:00,  7.73s/it, loss_diff=-2.809e-06, lr=0.004, val_loss=0.06046]
[I 2023-12-08 11:36:04,774] Trial 2 finished with value: 0.0604385759999103 and parameters: {'l2_regularization_coeff': 0.0001}. Best is trial 2 with value: 0.0604385759999103.
Epoch: 100%|██████████| 30/30 [03:29<00:00,  6.99s/it, loss_diff=-1.051e-05, lr=0.1, val_loss=0.06558]
[I 2023-12-08 11:39:40,589] Trial 3 finished with value: 

KeyboardInterrupt: 