## Training Study: Assay Model Architecture Exploration

This script performs model training experiments to study the effect of convolutional layer depth and dense neuron count using specified assay datasets.
It includes dataset preparation and systematic testing of different architectural configurations, producing metrics for comparison.

In [None]:
from ipynb.fs.full.data_training import *  # Custom module for training models
from activity_constants import *           # Constants such as paths, filenames, etc.

In [None]:
########################################################################
# Clears the current Keras backend session to avoid clutter from old models.
########################################################################
K.clear_session()

In [None]:
########################################################################
# get_dataset_layer_study: Constructs model names and output paths based 
# on input parameters, and triggers the training process using those settings.
#
# Inputs:
#   - p_assay: List of assay IDs.
#   - p_output_subfolder: Subfolder for saving results.
#   - dataset: Tuple of train/validation/test data.
#   - p_epocs: Number of training epochs.
#   - p_conv_extr: List of convolutional layers [filters, kernel_size].
#   - p_dense: List of dense layers [units, dropout].
#   - p_metrics: Evaluation metrics (default: acc, precision, recall, f1).
########################################################################
def get_dataset_layer_study(p_assay, p_output_subfolder, dataset, p_epocs, p_conv_extr, p_dense#, p_metrics = ['acc']):
                              , p_metrics = ['acc'
                               , precision
                               , recall
                               , f1
                                           ] 
                           ):
    
    assay_item_name = 'model_'
    assay_codes = ''
    for assay in p_assay:
        if (len(assay_codes) > 0):
            assay_codes = assay_codes + '#'
        assay_codes = assay_codes + (str)(assay)
        
    assay_item_name =  assay_item_name + assay_codes    
    for conv_extr in p_conv_extr:
      assay_item_name =  assay_item_name + '#{0}_{1}'.format(conv_extr[0], conv_extr[1])
        
    assay_item_name =  assay_item_name + '#'
    for dense in p_dense:
      assay_item_name =  assay_item_name + '#{0}_{1}'.format(dense[0], dense[1])
    
    print ('Creating model {0}'.format(assay_item_name))
    
    output_folder = model_folder
    
    if (len(p_output_subfolder)>0):
        output_folder = output_folder + '/' + p_output_subfolder
        
    print ('Output folder {0}'.format(output_folder))
    
    print ('''train_assay_model({}
                    , {}
                    , {}
                    , {}
                    , {}
                    , {}
                    , {}
                    , {}
                    , {}
                     )'''.format(data_set_folder
                    , p_assay
                    , output_folder
                    , 1
                    ,  p_epocs
                    , p_metrics
                    , assay_item_name
                    , p_conv_extr
                    , p_dense))
    
    train_assay_model(data_set_folder
                    , p_assay
                    , output_folder
                    , dataset
                    , 1
                    , p_epochs = p_epocs
                    , p_metrix_list=p_metrics
                    , p_assay_item_name = assay_item_name
                    , p_conv_extr = p_conv_extr
                    , p_dense = p_dense                  
                     )
    


In [None]:
########################################################################
# get_dataset_layer_study_standard: Automates training over multiple
# convolutional depths with fixed dense layers.
#
# Inputs:
#   - assay: Assay ID.
#   - p_output_subfolder: Subfolder name for output.
#   - dataset: Training/validation/test data.
#   - p_epocs: Number of training epochs.
#   - p_filter_size: Kernel size for conv layers.
########################################################################
def get_dataset_layer_study_standard(assay, p_output_subfolder, dataset, p_epocs , p_filter_size):
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, p_filter_size]], p_dense=[[128, 0]])
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, p_filter_size], [32, p_filter_size]], p_dense=[[128, 0]])
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, p_filter_size], [32, p_filter_size], [64, p_filter_size]], p_dense=[[128, 0]])
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, p_filter_size], [32, p_filter_size], [64, p_filter_size], [128, p_filter_size]], p_dense=[[128, 0]])
    

In [None]:
########################################################################
# Dataset loading and preparation.
########################################################################
dataset_folder = get_assay_folder_dataset_name(data_set_folder, prefix_dataset_folder, [1806])
x_train, y_train, x_val, y_val, x_test, y_test = get_data_from_files (dataset_folder, dataRegionEspFromX, dataRegionEspFromY, dataRegionEspNumColumns, dataRegionEspNumRows, 1, None)
dataset = [x_train, y_train, x_val, y_val, x_test, y_test]

In [None]:
########################################################################
# Run convolutional layer depth study (study_layer).
########################################################################
study_name = 'study_layer'
global_overview_folder = model_folder + '/' + study_name
create_global_overview_file (global_overview_folder)

In [None]:
# Run tests for various filter sizes
for filter_size in [3, 5, 7, 11, 13, 15]:
    get_dataset_layer_study_standard([1806], study_name, dataset, default_data_training_num_epocs, filter_size)

In [None]:
########################################################################
# get_dataset_layer_study_dense: Automates dense layer variations 
# while keeping convolutional structure fixed.
#
# Inputs:
#   - assay: Assay ID.
#   - p_output_subfolder: Output folder name.
#   - dataset: Full dataset.
#   - p_epocs: Number of training epochs.
#   - p_neurons: Number of neurons in dense layers.
########################################################################
def get_dataset_layer_study_dense(assay, p_output_subfolder, dataset, p_epocs, p_neurons):
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, 7], [32, 7]],
                            p_dense=[[p_neurons, 0], [p_neurons, 0]])
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, 7], [32, 7]],
                            p_dense=[[p_neurons, 0], [p_neurons, 0], [p_neurons, 0]])
    get_dataset_layer_study(assay, p_output_subfolder, dataset, p_epocs, 
                            p_conv_extr=[[16, 7], [32, 7]],
                            p_dense=[[p_neurons, 0]])


In [None]:
########################################################################
# Run dense layer neuron count study (study_dense).
########################################################################
study_name = 'study_dense'
global_overview_folder = model_folder + '/' + study_name
create_global_overview_file(global_overview_folder)

In [None]:
# Run tests for different neuron counts
for neurons in [16, 32, 64, 128, 256, 512]:
    get_dataset_layer_study_dense([1806], study_name, dataset, default_data_training_num_epocs, neurons)