  # Train simple Fully Connected NN on (1472) profiles to classify TPSA  :

# Setup

In [2]:
%load_ext autoreload  
%autoreload 2
from IPython.display import display, HTML, Image
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:98% !important; }</style>"))
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import os
import sys
import random
from functools import partial
from typing import List, Tuple
from types import SimpleNamespace
import yaml
import pprint
import logging
from datetime import datetime
for p in ['./src','../..']:
    if p not in sys.path:
        print(f"insert {p}")
        sys.path.insert(0, p)
print(sys.path)

import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import scipy
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt 
from torchinfo import summary

torch.set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=180, profile=None, sci_mode=None)
torch.manual_seed(42);  # seed rng for reproducibility
pp = pprint.PrettyPrinter(indent=4)
pd.options.display.width = 132
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')

# os.environ["WANDB_NOTEBOOK_NAME"] = "6.3_Profiles_NN.ipynb"
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"

torch.set_num_threads(4)  # <--- limit to ~ 2 CPUs
torch.get_num_threads()

insert ./src
insert ../..
['../..', './src', '/home/kevin/WSL-shared/cellpainting/cj-datasets', '/home/kevin/miniforge3/envs/cp311/lib/python311.zip', '/home/kevin/miniforge3/envs/cp311/lib/python3.11', '/home/kevin/miniforge3/envs/cp311/lib/python3.11/lib-dynload', '', '/home/kevin/miniforge3/envs/cp311/lib/python3.11/site-packages', '/home/kevin/miniforge3/envs/cp311/lib/python3.11/site-packages/huggingface_hub-0.20.3-py3.8.egg']


<torch._C.Generator at 0x7fda5e2fadd0>

4

In [4]:
import KevinsRoutines.utils as myutils
from KevinsRoutines.utils.utils_general import list_namespace, save_to_pickle, load_from_pickle
# import snnl.utils as utils
# from pt-snnl.snnl.utils.utils_ptsnnl import display_cellpainting_batch
from utils.utils_cellpainting import label_counts, balance_datasets,save_checkpoint, load_checkpoint
from utils.dataloader import CellpaintingDataset, InfiniteDataLoader, custom_collate_fn, dynamic_collate_fn
from utils.utils_notebooks import plot_cls_metrics, compute_classification_metrics, run_model_on_test_data,\
                                train, validation, accuracy_fn, fit, build_model



In [5]:
timestamp = datetime.now().strftime('%Y_%m_%d_%H:%M:%S')
logger = logging.getLogger(__name__)
logLevel = os.environ.get('LOG_LEVEL', 'INFO').upper()
FORMAT = '%(asctime)s - %(name)s - %(levelname)s: - %(message)s'
logging.basicConfig(level="INFO", format= FORMAT)
logger.info(f" Pytorch version  : {torch.__version__}")
logger.info(f" Scipy version    : {scipy.__version__}  \t\t Numpy version : {np.__version__}")
logger.info(f" Pandas version   : {pd.__version__}  ")

2024-10-16 17:37:50,395 - __main__ - INFO: -  Pytorch version  : 2.2.0
2024-10-16 17:37:50,395 - __main__ - INFO: -  Scipy version    : 1.11.4  		 Numpy version : 1.26.2
2024-10-16 17:37:50,396 - __main__ - INFO: -  Pandas version   : 2.2.0  


In [6]:
# Set visible GPU device 
# ----------------------------------------------
# os.environ["CUDA_VISIBLE_DEVICES"] = '0'
myutils.set_device(2);
device = myutils.get_device(verbose = True);
myutils.set_global_seed(1111)
print(device)

 Switched to: "cuda:2"   Device Name: NVIDIA TITAN Xp               


'cuda:2'

Dev Id   Device Name                    Total Memory                     InUse                            Free Memory 
   0     Quadro GV100                   34,069,872,640 B/ (31.73 GB)  	 1,287,651,328 B / (1.20 GB)  	 32,782,221,312 B / (30.53 GB)  
   1     Quadro GV100                   34,069,872,640 B/ (31.73 GB)  	 1,287,651,328 B / (1.20 GB)  	 32,782,221,312 B / (30.53 GB)  
   2     NVIDIA TITAN Xp                12,774,539,264 B/ (11.90 GB)  	 826,408,960 B / (0.77 GB)  	 11,948,130,304 B / (11.13 GB)   *** CURRENT DEVICE *** 

 Current CUDA Device is:  "cuda:2"  Device Name: NVIDIA TITAN Xp
 Seed value set to 1111 in random, np.random, and torch
cuda:2


In [7]:
try:
    del model
except Exception as e:
    pass

# main(args)

In [8]:
LATENT_DIM    = 1471
HIDDEN_1      = 512
COMPOUNDS_PER_BATCH = 600

# MODEL_TYPE = 'batch_norm'
MODEL_TYPE = 'single_layer'
# MODEL_TYPE = 'relu'
n_input    = LATENT_DIM  # the embedding dimensionality 

n_hidden_1 = 256  # the number of neurons in the hidden layer of the MLP
n_hidden_2 = 256  # the number of neurons in the hidden layer of the MLP
n_hidden_3 = 128

# metadata_cols = ['Metadata_Source', 'Metadata_Batch', 'Metadata_Plate', 'Metadata_Well', 'Metadata_JCP2022', 'Metadata_Hash', 'Metadata_Bin', 'Metadata_TPSA', 'Metadata_lnTPSA', 'Metadata_log10TPSA', 'Metadata_Permiation']
# FEATURE_COLS  = 1482 - len(metadata_cols)
# METADATA_COLS += [f'Feature_{x:03d}' for x in range(LATENT_DIM)]

METADATA_COLS = ['Metadata_Source', 'Metadata_Batch', 'Metadata_Plate', 'Metadata_Well', 'Metadata_JCP2022', 'Metadata_Hash', 'Metadata_Bin', 'Metadata_TPSA', 'Metadata_lnTPSA', 'Metadata_log10TPSA', 'Metadata_Permiation']
input_cols = LATENT_DIM + len(METADATA_COLS)
print(len(METADATA_COLS))
print(input_cols)

INPUT_PATH = f"/home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/"
CKPT_PATH = "./saved_models/profile_models"

11
1482


In [9]:
# RUN_DATETIME = datetime.now().strftime('%Y%m%d_%H%M')

# RUN_DATETIME = '20240916_1800'   ## CellProfiles CPB 600 - Single layer 256
# RUN_DATETIME = '20240916_1801'   ## CellProfiles CPB 600 - Single layer 256
# RUN_DATETIME = '20240916_1802'   ## CellProfiles CPB 600 - Single layer 256
RUN_DATETIME = '20240916_1803'   ## CellProfiles CPB 600 - Single layer 256
# RUN_DATETIME = '20240916_1802'   ## CellProfiles CPB 600 - Single layer 256
# RUN_DATETIME = '20240921_0600'   ## CellProfiles CPB 600 - Single layer 512
# RUN_DATETIME = '20240912_0400'   ## CellProfiles CPB 600 - ReLU 256/256/128
# RUN_DATETIME = '20240912_0401'   ## CellProfiles CPB 600 - ReLU 256/256/128
# RUN_DATETIME = '20240913_0700'   ## CellProfiles CPB 600 - Batch Norm 256/256/128
# RUN_DATETIME = '20241004_1600'   ## CellProfiles CPB 600 - Batch Norm 512/512/128
print(RUN_DATETIME)

20240916_1803


In [10]:
# SNNL AUTOENCODERS 
# AE_RUNMODE = "snnl"
# AE_DATETIME = "20240718_1956"
# AE_DATETIME = "20240906_2201"     # Autoencoder training - SNNL, CPB = 600, Latent 150, WD = 0.001, SNN Factor 3
# AE_DATETIME = "20240917_2004"     # Autoencoder training - SNNL, CPB = 600, Latent 250, WD = 0.001, SNN Factor 3

## BASELINE AUTOENCODERS 
# AE_RUNMODE = 'base'
# AE_DATETIME = "20240923_1943"     # Autoencoder training - Baseline, CPB = 600, Latent 150, WD = 0.001 (SNN Factor 0)
# AE_DATETIME = "20240917_2017"     # Autoencoder training - Baseline, CPB = 600, Latent 250, WD = 0.001 (SNN Factor 0)

# AE_CKPTTYPE = "BEST"
# AE_CKPTTYPE = "LAST"

In [11]:
CKPT_FILE = f"NN_1482profiles_cpb{COMPOUNDS_PER_BATCH}_{RUN_DATETIME}_ep_{{ep}}"
print(CKPT_FILE)

NN_1482profiles_cpb600_20240916_1803_ep_{ep}


# Input Dataloader

In [12]:
TRAIN_INPUT_FILE = f"3sample_profiles_1482_HashOrder_training.csv"
VAL_INPUT_FILE   = f"3sample_profiles_1482_HashOrder_training_sub_val.csv"
TEST_INPUT_FILE  = f"3sample_profiles_1482_HashOrder_training_sub_test.csv"
# TEST_INPUT_FILE  = f"3sample_profiles_1482_HashOrder_test.csv"
# ALL_INPUT_FILE   = f"3sample_profiles_1482_HashOrder_all.csv"

TRAIN_INPUT = os.path.join(INPUT_PATH, TRAIN_INPUT_FILE)
VAL_INPUT   = os.path.join(INPUT_PATH, VAL_INPUT_FILE)
TEST_INPUT  = os.path.join(INPUT_PATH, TEST_INPUT_FILE)

print(f" {TRAIN_INPUT_FILE}")
print(f" {VAL_INPUT_FILE}")
print(f" {TEST_INPUT_FILE}")

print(f" TRAIN_INPUT:  {TRAIN_INPUT}")
print(f" VAL_INPUT  :  {VAL_INPUT }")
print(f" TEST_INPUT :  {TEST_INPUT }")

 3sample_profiles_1482_HashOrder_training.csv
 3sample_profiles_1482_HashOrder_training_sub_val.csv
 3sample_profiles_1482_HashOrder_training_sub_test.csv
 TRAIN_INPUT:  /home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training.csv
 VAL_INPUT  :  /home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training_sub_val.csv
 TEST_INPUT :  /home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training_sub_test.csv


In [13]:
## total rows = 346,542
## Rows on 3sample_profiles_1482_HashOrder_training.csv: 312,000
## Split to:
##     Train      : 277,200     (312,000 - (21,600+12,600+600))
##     Validation :  21,600
##     Test       :  12,600
##     left over  :     600
cellpainting_args = {'compounds_per_batch': COMPOUNDS_PER_BATCH,
                     'training_path'  : TRAIN_INPUT,
                     'validation_path': VAL_INPUT,
                     'test_path'      : TEST_INPUT,
                     'train_start'    : 0,
                     'train_end'      : 277_200,  # was 276,000
                     'val_start'      : 0,
                     'val_end'        : 21_600,  # was 300,000 
                     'test_start'     : 0,
                     'test_end'       : 12_600,  # 34_542
                     'tpsa_threshold' : 100
                    }

In [14]:
cellpainting_args

{'compounds_per_batch': 600,
 'training_path': '/home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training.csv',
 'validation_path': '/home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training_sub_val.csv',
 'test_path': '/home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training_sub_test.csv',
 'train_start': 0,
 'train_end': 277200,
 'val_start': 0,
 'val_end': 21600,
 'test_start': 0,
 'test_end': 12600,
 'tpsa_threshold': 100}

In [15]:
#### Load CellPainting Dataset
# data : keys to the dataset settings (and resulting keys in output dictionary)
dataset = dict()
data_loader = dict()

print(f" load {dataset}")
for datatype in ['train', 'val', 'test']:
    dataset[datatype] = CellpaintingDataset(type = datatype, **cellpainting_args)
    data_loader[datatype] = InfiniteDataLoader(dataset = dataset[datatype], batch_size=1, shuffle = False, num_workers = 0,
                                               collate_fn = partial(dynamic_collate_fn, tpsa_threshold = dataset[datatype].tpsa_threshold) )

2024-10-16 17:37:51,639 - utils.dataloader - INFO: -  Building CellPantingDataset for train
2024-10-16 17:37:51,640 - utils.dataloader - INFO: -  filename:  /home/kevin/WSL-shared/cellpainting/cj-datasets/output_11102023/3_sample_profiles/3sample_profiles_1482_HashOrder_training.csv
2024-10-16 17:37:51,641 - utils.dataloader - INFO: -  type    :  train
2024-10-16 17:37:51,642 - utils.dataloader - INFO: -  start   :  0
2024-10-16 17:37:51,642 - utils.dataloader - INFO: -  end     :  277200
2024-10-16 17:37:51,643 - utils.dataloader - INFO: -  numrows :  277200
2024-10-16 17:37:51,644 - utils.dataloader - INFO: -  names   :  None     usecols :  None
2024-10-16 17:37:51,644 - utils.dataloader - INFO: -  batch_size  :  1
2024-10-16 17:37:51,645 - utils.dataloader - INFO: -  sample_size :  3
2024-10-16 17:37:51,645 - utils.dataloader - INFO: -  compounds_per_batch :  600
2024-10-16 17:37:51,646 - utils.dataloader - INFO: -  rows per batch (chunksize) :  1800
2024-10-16 17:37:51,647 - utils.

 load {}
 Dataset size: 277200   rows per batch: 1800  tpsa_threshold: 100
 Dataset size: 21600   rows per batch: 1800  tpsa_threshold: 100


2024-10-16 17:37:51,661 - utils.dataloader - INFO: -  batch_size  :  1
2024-10-16 17:37:51,661 - utils.dataloader - INFO: -  sample_size :  3
2024-10-16 17:37:51,662 - utils.dataloader - INFO: -  compounds_per_batch :  600
2024-10-16 17:37:51,662 - utils.dataloader - INFO: -  rows per batch (chunksize) :  1800
2024-10-16 17:37:51,663 - utils.dataloader - INFO: -  TPSA threshold :  100
2024-10-16 17:37:51,663 - utils.dataloader - INFO: -  Each mini-batch contains 600.0 compounds with 3 samples per compound : total 1800 rows
2024-10-16 17:37:51,663 - utils.dataloader - INFO: -  Number of 1800 row full size batches per epoch: 7
2024-10-16 17:37:51,664 - utils.dataloader - INFO: -  Rows covered by 7 full size batches (1800 rows) per epoch:  12600
2024-10-16 17:37:51,664 - utils.dataloader - INFO: -  Last partial batch contains : 0 rows
2024-10-16 17:37:51,665 - utils.dataloader - INFO: -  


 Dataset size: 12600   rows per batch: 1800  tpsa_threshold: 100


### Misc code


In [16]:
## total rows = 346,542
## Rows on 3sample_profiles_1482_HashOrder_training.csv: 312,000
## Split to:
##     Train      : 277,200     (312,000 - (21,600+12,600+600))
##     Validation :  21,600
##     Test       :  12,600
##     left over  :     600
# cellpainting_args = {'sample_size'    : 3,
#                      'batch_size'     : 1,
#                      'compounds_per_batch': 600,
#                      'training_path'  : TRAIN_INPUT,
#                      'validation_path': TRAIN_INPUT,
#                      'test_path'      : TRAIN_INPUT,
#                      'train_start'    : 0,
#                      'train_end'      : 277_200,  # was 276,000
#                      'val_start'      : 277_200,
#                      'val_end'        : 298_800,  # was 300,000 
#                      'test_start'     : 298_800,
#                      'test_end'       : 311_400,  # 34_542
#                     }

# cellpainting_args = {'sample_size': 3,
#                      'batch_size': 1,
#                      'compounds_per_batch': 200,
#                      'training_path'  : TRAIN_INPUT,
#                      'validation_path': TEST_INPUT,
#                      'test_path'      : TEST_INPUT,
#                      'train_start'    : 0,
#                      'train_end'      : 312_000,
#                      'val_start'      : 0,
#                      'val_end'        : 24_000,
#                      'test_start'     : 24_000,
#                      'test_end'       : 34_200,   ## 34_542 }

In [17]:
# %%timeit
# for dataset in ['train', 'val', 'test']:
#     for idx, batch in enumerate(data_loader[dataset]):
#         print(batch[0].shape[0], batch[1].sum())
#         # display_cellpainting_batch(idx, batch)
#         if idx == 1:
#             break

In [18]:
# # -----------------------------------------
# #  Count pos/neg labels in each dataset
# # -----------------------------------------
# for datatype in ['train', 'val', 'test']:
#     MINIBATCH_SIZE = data_loader[datatype].dataset.sample_size * data_loader[datatype].dataset.compounds_per_batch
#     print(f" {datatype.capitalize()} Minibatch size : {MINIBATCH_SIZE}") 
# print()

# for datatype in ['train', 'val', 'test']:
#     # for datatype in ['val', 'test']:
#     minibatches = len(data_loader[datatype]) // MINIBATCH_SIZE
#     ttl_rows = 0
#     ttl_pos_labels = 0 
#     with tqdm.tqdm(enumerate(data_loader[datatype]), initial=0, total = minibatches, position=0, file=sys.stdout,
#                    leave= False, desc=f" Count labels ") as t_warmup:
#         for batch_count, (batch_features, batch_labels, _, _, _, _, _) in t_warmup:
#             ttl_rows += batch_labels.shape[0]
#             ttl_pos_labels += batch_labels.sum()
#     ttl_neg_labels = ttl_rows - ttl_pos_labels
#     ttl = f"\n Dataset: {datatype} -  len of {datatype} data loader: {len(data_loader[datatype])}   number of batches: {minibatches}"
#     print(ttl)
#     print('-'*len(ttl))
#     print(f" total rows     : {ttl_rows:7d}")
#     print(f" total pos rows : {ttl_pos_labels:7.0f} - {ttl_pos_labels*100.0/ttl_rows:5.2f}%")
#     print(f" total neg rows : {ttl_neg_labels:7.0f} - {ttl_neg_labels*100.0/ttl_rows:5.2f}%")
#     print()

     Minibatch size : 1800 

     Dataset: train -  len of train data loader: 277200   number of batches: 154
    ------------------------------
     total rows     :  277200
     total pos rows :   33129 - 11.95%
     total neg rows :  244071 - 88.05%
      
     Dataset: val - len of val data loader: 21600   number of batches: 12
    ------------------------------
     total rows     :   21600
     total pos rows :    2532 - 11.72%
     total neg rows :   19068 - 88.28%

     Dataset: test - len of test data loader: 12600   number of batches: 7
    ------------------------------
     total rows     :   12600
     total pos rows :    1431 - 11.36%
     total neg rows :   11169 - 88.64%


# Define Neural Net Model 


 - 1 layer model :
 
     - **20240916_1800** : Run on 1 FC layer model - Input --> 256 --> Tanh --> 1
     
     - **20240921_0600** : Run on 1 FC layer model - Input --> 512 --> Tanh --> 1     


  - 4 layer model :

    Input --> Hidden1 --> (BN/NL) ---> Hidden2 ---> (BN/NL) ---> Hidden3 --->  (BN/NL) ---> 1

     - **20240912_0400** : Run on 4 FC layer model - Relu non linearities (NO Batch Norm)
     - **20240912_0401** : Run on 4 FC layer model - Relu non linearities (NO Batch Norm)
     
     - **20240913_0700** : Run on 4 FC layer model - 256/256/128 with BATCH NORM and tanh non linearities
     
     - **20241004_1600** : Run on 4 FC layer model - 512/512/128 with BATCH NORM and tanh non linearities


In [19]:

model = build_model(MODEL_TYPE, input = n_input, hidden_1 = n_hidden_1, hidden_2 = n_hidden_2, hidden_3=n_hidden_3, device = device)
 

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Param %                   Mult-Adds                 Trainable
Sequential                               [30, 1471]                [30, 1]                   --                             --                   --                        True
├─Linear: 1-1                            [30, 1471]                [30, 256]                 376,832                    99.93%                   11,304,960                True
│    └─weight                                                                                ├─376,576
│    └─bias                                                                                  └─256
├─Tanh: 1-2                              [30, 256]                 [30, 256]                 --                             --                   --                        --
├─Linear: 1-3                            [30, 256]                 [30, 1]                 

In [20]:
metrics = { 'loss_trn' : [], 'acc_trn' : [], 'loss_val' : [], 'acc_val' : []}

start_epoch, end_epoch = 0, 0
init_LR = 1.0e-3
# curr_LR = init_LR

# create a PyTorch optimizer
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = step_size, gamma=0.1, last_epoch =-1)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.3 , patience=20, cooldown=10,)
optimizer = torch.optim.AdamW(model.parameters(), lr=init_LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.5, threshold=1.0e-06, patience=50, cooldown=10,)

### Read checkpoint

In [21]:
# # loaded_epoch
# optimizer.state_dict()
# scheduler.state_dict()

In [22]:
# from utils.utils_cellpainting import load_checkpoint
# model, optimizer, scheudler, end_epoch = load_checkpoint(model, optimizer, scheduler, f"NN_1482profiles_20240916_1800_ep_200.pt", ckpt_path = CKPT_PATH)
# model = model.to(device)

In [23]:
# end_epoch
# optimizer.state_dict()
# scheduler.state_dict()

# Run Training 

In [24]:
# start_epoch = 0
start_epoch = end_epoch
end_epoch += 400
# start_epoch, end_epoch = 0,100
print(start_epoch, end_epoch)
_ = model.train()

0 400


In [25]:

metrics = fit(model, optimizer, scheduler, data_loader, metrics, start_epoch, end_epoch, device, CKPT_FILE, CKPT_PATH )


 17:43:04 | Ep:   1/ 400 | Trn loss:  0.478415 - Acc: 80.6732 | Val loss:  0.392574 - Acc: 87.5231 | last_lr: 1.00000e-03  bad_ep: 0  cdwn: 0                              
 17:45:06 | Ep:   2/ 400 | Trn loss:  0.375259 - Acc: 87.8056 | Val loss:  0.370061 - Acc: 88.1944 | last_lr: 1.00000e-03  bad_ep: 0  cdwn: 0                              
 17:47:06 | Ep:   3/ 400 | Trn loss:  0.362996 - Acc: 88.0018 | Val loss:  0.364823 - Acc: 88.1944 | last_lr: 1.00000e-03  bad_ep: 0  cdwn: 0                              
 17:49:08 | Ep:   4/ 400 | Trn loss:  0.356570 - Acc: 88.0480 | Val loss:  0.364262 - Acc: 88.1343 | last_lr: 1.00000e-03  bad_ep: 0  cdwn: 0                              
 17:51:09 | Ep:   5/ 400 | Trn loss:  0.351066 - Acc: 88.0819 | Val loss:  0.364078 - Acc: 88.1296 | last_lr: 1.00000e-03  bad_ep: 0  cdwn: 0                              
 17:53:11 | Ep:   6/ 400 | Trn loss:  0.345767 - Acc: 88.1100 | Val loss:  0.364731 - Acc: 88.1528 | last_lr: 1.00000e-03  bad_ep: 1  cdwn: 

2024-10-16 21:00:13,491 - utils.utils_cellpainting - INFO: -  Model exported to NN_1482profiles_cpb600_20240916_1803_ep_100.pt - epoch: 100


 21:00:13 | Ep: 100/ 400 | Trn loss:  0.034851 - Acc: 99.2897 | Val loss:  1.008564 - Acc: 82.0370 | last_lr: 5.00000e-04  bad_ep: 34  cdwn: 0 
 21:02:11 | Ep: 101/ 400 | Trn loss:  0.040158 - Acc: 98.9939 | Val loss:  1.022726 - Acc: 81.7963 | last_lr: 5.00000e-04  bad_ep: 35  cdwn: 0                             
 21:04:10 | Ep: 102/ 400 | Trn loss:  0.037964 - Acc: 99.0981 | Val loss:  1.062303 - Acc: 81.3889 | last_lr: 5.00000e-04  bad_ep: 36  cdwn: 0                             
 21:06:08 | Ep: 103/ 400 | Trn loss:  0.036127 - Acc: 99.1944 | Val loss:  1.035695 - Acc: 81.0278 | last_lr: 5.00000e-04  bad_ep: 37  cdwn: 0                             
 21:08:06 | Ep: 104/ 400 | Trn loss:  0.037322 - Acc: 99.1392 | Val loss:  1.029338 - Acc: 81.0185 | last_lr: 5.00000e-04  bad_ep: 38  cdwn: 0                             
 21:10:02 | Ep: 105/ 400 | Trn loss:  0.035709 - Acc: 99.2038 | Val loss:  1.043734 - Acc: 80.7500 | last_lr: 5.00000e-04  bad_ep: 39  cdwn: 0                          

2024-10-17 00:15:23,605 - utils.utils_cellpainting - INFO: -  Model exported to NN_1482profiles_cpb600_20240916_1803_ep_200.pt - epoch: 200


 00:15:23 | Ep: 200/ 400 | Trn loss:  0.004038 - Acc: 99.9978 | Val loss:  1.328818 - Acc: 81.4769 | last_lr: 1.25000e-04  bad_ep: 12  cdwn: 0 
 00:17:19 | Ep: 201/ 400 | Trn loss:  0.003966 - Acc: 99.9978 | Val loss:  1.330642 - Acc: 81.4907 | last_lr: 1.25000e-04  bad_ep: 13  cdwn: 0                             
 00:19:16 | Ep: 202/ 400 | Trn loss:  0.003896 - Acc: 99.9978 | Val loss:  1.332528 - Acc: 81.4861 | last_lr: 1.25000e-04  bad_ep: 14  cdwn: 0                             
 00:21:14 | Ep: 203/ 400 | Trn loss:  0.003827 - Acc: 99.9978 | Val loss:  1.334495 - Acc: 81.4815 | last_lr: 1.25000e-04  bad_ep: 15  cdwn: 0                             
 00:23:12 | Ep: 204/ 400 | Trn loss:  0.003760 - Acc: 99.9978 | Val loss:  1.336535 - Acc: 81.4491 | last_lr: 1.25000e-04  bad_ep: 16  cdwn: 0                             
 00:25:09 | Ep: 205/ 400 | Trn loss:  0.003695 - Acc: 99.9978 | Val loss:  1.338647 - Acc: 81.4491 | last_lr: 1.25000e-04  bad_ep: 17  cdwn: 0                          

2024-10-17 03:31:35,673 - utils.utils_cellpainting - INFO: -  Model exported to NN_1482profiles_cpb600_20240916_1803_ep_300.pt - epoch: 300


 03:31:35 | Ep: 300/ 400 | Trn loss:  0.001097 - Acc: 99.9989 | Val loss:  1.537122 - Acc: 81.6065 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 10 
 03:33:33 | Ep: 301/ 400 | Trn loss:  0.001171 - Acc: 99.9989 | Val loss:  1.538434 - Acc: 81.5972 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 9                              
 03:35:31 | Ep: 302/ 400 | Trn loss:  0.001186 - Acc: 99.9989 | Val loss:  1.538758 - Acc: 81.5926 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 8                              
 03:37:29 | Ep: 303/ 400 | Trn loss:  0.001199 - Acc: 99.9989 | Val loss:  1.539960 - Acc: 81.6481 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 7                              
 03:39:24 | Ep: 304/ 400 | Trn loss:  0.001212 - Acc: 99.9989 | Val loss:  1.541808 - Acc: 81.6528 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 6                              
 03:41:20 | Ep: 305/ 400 | Trn loss:  0.001224 - Acc: 99.9989 | Val loss:  1.543846 - Acc: 81.6713 | last_lr: 3.12500e-05  bad_ep: 0  cdwn: 5                           

2024-10-17 06:46:32,263 - utils.utils_cellpainting - INFO: -  Model exported to NN_1482profiles_cpb600_20240916_1803_ep_400.pt - epoch: 400


 06:46:32 | Ep: 400/ 400 | Trn loss:  0.000745 - Acc: 99.9996 | Val loss:  1.602056 - Acc: 81.8426 | last_lr: 1.56250e-05  bad_ep: 29  cdwn: 0 


In [None]:
# if (epoch+1) % 100 == 0:
# epoch = 299
# filename = f"NN_1482profiles_{ae_datetime}_{ae_ckpttype}_{RUN_DATETIME}_ep_{epoch+1}"
# print(filename)
# save_checkpoint(epoch+1, model, optimizer, scheduler, filename = filename, ckpt_path = CKPT_PATH, verbose = False)

    * 20240916_1800 - CellProfiles (1482) ---> Single Layer 256
    
        21:44:39 | Ep: 100/ 100 | Trn loss:  0.047957 - Acc: 99.1926 | Val loss:  0.813064 - Acc: 81.9306 | last_lr: 1.25000e-04  bad_ep: 2  cdwn: 0 
        04:27:48 | Ep: 200/ 200 | Trn loss:  0.022507 - Acc: 99.8802 | Val loss:  0.929760 - Acc: 81.6898 | last_lr: 1.56250e-05  bad_ep: 9  cdwn: 0 
    
    * 20240921_0600 - CellProfiles (1482) ---> Single Layer 512
    
        21:44:39 | Ep: 100/ 100 | Trn loss:  0.047957 - Acc: 99.1926 | Val loss:  0.813064 - Acc: 81.9306 | last_lr: 1.25000e-04  bad_ep: 2  cdwn: 0 
        04:27:48 | Ep: 200/ 200 | Trn loss:  0.022507 - Acc: 99.8802 | Val loss:  0.929760 - Acc: 81.6898 | last_lr: 1.56250e-05  bad_ep: 9  cdwn: 0 
    

    
    * 20240912_0400 - CellProfiles (1482) --->  4 layer FCN with ReLU 256/256/128

         08:01:23 | Ep:  99/ 100 | Trn loss:  0.264891 -  0.316413  Acc: 89.7615 | Val loss:  0.487438  -  0.401803  Acc: 87.9365 | last_lr: 5.00000e-03  bad_ep: 34  cdwn: 0 
         08:29:23 | Ep: 100/ 200 | Trn loss:  0.267485 -  0.267485  Acc: 89.6919 | Val loss:  0.487438  -  0.487438  Acc: 87.9365 | last_lr: 5.00000e-03  bad_ep: 35  cdwn: 0
         12:22:02 | Ep: 199/ 200 | Trn loss:  0.267485 -  0.267485  Acc: 89.6919 | Val loss:  0.487438  -  0.487438  Acc: 87.9365 | last_lr: 1.25000e-03  bad_ep: 12  cdwn: 0 
    
    
    * 20240912_0700 - CellProfiles (1482) --->  4 layer FCN Batch Norm/Tanh 256/256/128
    
        18:10:13 | Ep: 100/ 200 | Trn loss:  0.013115 -  0.164907  Acc: 99.7359 | Val loss:  1.854780  -  0.885383  Acc: 81.6296 | last_lr: 1.25000e-03  bad_ep: 2  cdwn: 0  
        22:01:08 | Ep: 199/ 200 | Trn loss:  0.000011 -  0.084685  Acc: 100.0000 | Val loss:  3.905227  -  1.915857  Acc: 81.0093 | last_lr: 1.56250e-04  bad_ep: 8  cdwn: 0
        02:09:45 | Ep: 299/ 300 | Trn loss:  0.000000 -  0.056457  Acc: 100.0000 | Val loss:  5.124249  -  2.825850  Acc: 80.8565 | last_lr: 1.95313e-05  bad_ep: 15  cdwn: 0  


    * 20241004_1600 - CellProfiles (1482) --->  4 layer FCN Batch Norm/Tanh 512/512/128
    




In [None]:
# end_epoch = 200
# filename = f"NN_snnl_embd600_150Ltnt_512_{ae_datetime}_{ae_ckpttype}_{RUN_DATETIME}_ep_{end_epoch}"
# print(filename)
start_epoch, end_epoch
start_epoch = end_epoch
end_epoch += 100
start_epoch, end_epoch

# save_checkpoint(end_epoch, model, optimizer, scheduler, filename = filename, ckpt_path = "model_ckpts", verbose = False)

In [None]:
#         metrics['loss_trn'].append(trn_loss.item())
#         metrics['acc_trn'].append(trn_acc)
#         metrics['loss_val'].append(val_loss.item())
#         metrics['acc_val'].append(val_acc)
for idx, (trn_loss, trn_acc, val_loss, val_acc) in enumerate(zip(metrics['loss_trn'],metrics['acc_trn'],metrics['loss_val'],metrics['acc_val'])):
    print(f" {datetime.now().strftime('%X')} | Ep: {idx:3d}/{end_epoch:4d} | Trn loss: {trn_loss:9.6f} - Acc: {trn_acc:.4f} |"
      f" Val loss: {val_loss:9.6f} - Acc: {val_acc:.4f} | ")