# Automated Brute-Force Training and Evaluation Pipeline for Datasets and Models - by Selman Tabet @ https://selman.io/

In [1]:
import os
import time
import socket

TEMP_DIR = "tmp"

### Environment Setup

In [2]:
print("Hostname: ", socket.gethostname())
try: # for CUDA enviroment
    os.system("nvidia-smi")
except:
    pass

Hostname:  Chaos


### Importing Libraries

In [3]:
# Data processing libraries
import numpy as np
from itertools import combinations # For brute force combinatoric search
import json # For saving and loading training results
import argparse # For command line arguments

# Tensorflow-Keras ML libraries
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model # To plot model architecture

from IPython import get_ipython # To check if code is running in Jupyter notebook
import importlib.util # To import config module from str
from pprint import pprint # To show config

# Custom helper libraries
from notebook_cfg import * # Default parameters
from utils.img_processing import enforce_image_params
from utils.dataset_processors import * # Dataset and generator processing functions
from utils.plot_functions import * # Plotting functions
from utils.evaluator import * # Complete evaluation program

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Device check

In [4]:
cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
print(f"CUDA_VISIBLE_DEVICES: {cuda_visible_devices}")
print(tf.config.get_visible_devices())
print(tf.config.list_physical_devices('GPU'))

CUDA_VISIBLE_DEVICES: None
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
[]


### Parse arguments from command line

In [5]:
# Detect if running in a Jupyter notebook
# Generated using GPT-4o. Prompt: "Detect if running in a Jupyter notebook"
def in_notebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        else:
            return False  # Other type (terminal, etc.)
    except NameError:
        return False      # Probably standard Python interpreter
    
from_py = False
parser = argparse.ArgumentParser(
    description="Parse command line arguments")
parser.add_argument('--from-py-cfg', type=str,
                    help='Path to the config Python file')
if not in_notebook():
    args = parser.parse_args()
    config_file_path = args.from_py_cfg
    print(f"Python Config Path: {config_file_path}")
else:
    config_file_path = False

if config_file_path:
    spec = importlib.util.spec_from_file_location("config_module", config_file_path)
    config_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config_module)
    config = config_module.cfg
    print("Loaded config from Python file:")
    pprint(config)
    # Datasets, models, and hyperparameters are mandatory and must be processed now.
    training_datasets = config.get('datasets', {})
    full_test_dir = config.get('test')
    base_models = config.get('keras_models', [])
    custom_models = config.get('custom_models', [])
    hyperparameters = config.get('hyperparameters')
    default_hyperparameters = default_cfg.get('hyperparameters', {})
    if hyperparameters is None or len(hyperparameters) == 0:
        print("No training hyperparameters defined in config, using defaults.")
        hyperparameters = default_hyperparameters
    else:
        for key, value in default_hyperparameters.items():
            if key not in hyperparameters:
                print(f"Missing hyperparameter - falling back to default {key}:{default_hyperparameters[key]}")
                hyperparameters[key] = default_hyperparameters[key]
    from_py = True # Successfully completed the import
else:
    print("No Python config file specified, using default (notebook) config.")
    config = default_cfg
    training_datasets = config.get('datasets', {})
    base_models = config.get('keras_models', [])
    custom_models = config.get('custom_models', [])
    hyperparameters = config.get('hyperparameters', {"epochs": 50, "batch_size": 32})
    full_test_dir = config.get('test')

if training_datasets is None or len(training_datasets) == 0:
    raise ValueError("No train datasets defined in config.")

if base_models is None or len(base_models) == 0:
    if custom_models is None or len(custom_models) == 0:
        raise ValueError("No models defined in config.")

No Python config file specified, using default (notebook) config.


### Parsing parameters

In [6]:
train_dirs = [training_datasets[ds].get('train') for ds in training_datasets]
test_dirs = [training_datasets[ds].get('test') for ds in training_datasets]
val_dirs = [training_datasets[ds].get('val') for ds in training_datasets]

all_dirs = train_dirs + test_dirs + val_dirs + [full_test_dir]
all_dirs = [d for d in all_dirs if d is not None] # Remove None values

# Combine base_models and custom_models
all_models = base_models + custom_models
# Create a list to keep track of which models are custom
is_custom_model = [False] * len(base_models) + [True] * len(custom_models)

### Setting parameters

In [7]:
if from_py:
    epochs = hyperparameters.get('epochs') # Guaranteed to be present
    batch_size = hyperparameters.get('batch_size') # Guaranteed to be present
    img_height = config.get('image_height', default_cfg.get('image_height'))
    img_width = config.get('image_width', default_cfg.get('image_width'))
    optimizer_fn = config.get('optimizer', default_cfg.get('optimizer'))
    loss_fn = config.get('loss', default_cfg.get('loss'))
    callbacks_list = config.get('callbacks', default_cfg.get('callbacks'))
    metrics_list = config.get('metrics', default_cfg.get('metrics'))
    enforce_image_size = config.get('enforce_image_settings', default_cfg.get('enforce_image_settings'))
    val_size = config.get('val_size', default_cfg.get('val_size'))
else:
    epochs = hyperparameters.get('epochs', 50)
    batch_size = hyperparameters.get('batch_size', 32)
    img_height = default_cfg.get('image_height', 224)
    img_width = default_cfg.get('image_width', 224)
    optimizer_fn = default_cfg.get('optimizer', 'adam')
    loss_fn = default_cfg.get('loss', 'binary_crossentropy')
    callbacks_list = default_cfg.get('callbacks', [])
    metrics_list = default_cfg.get('metrics', ['accuracy'])
    enforce_image_size = default_cfg.get('enforce_image_settings', False)
    val_size = default_cfg.get('val_size', 0.2)


### Enforce defined resolution and colour mode

In [8]:
if enforce_image_size:
    for directory in all_dirs:
        print(f"Adjusting image properties in {directory}")
        enforce_image_params(directory, target_size=(img_width, img_height))

Adjusting image properties in datasets\dataset_1\train
Adjusting image properties in datasets\dataset_2\Training
Adjusting image properties in datasets\dataset_1\test
Adjusting image properties in datasets\dataset_2\Testing
Adjusting image properties in datasets\dataset_1\val
Adjusting image properties in datasets\dataset_3


### Generate training and validation datasets

In [9]:
dataset_names = []
train_datasets = [] # [ (dataset_1_train, dataset_2_train), ... ]
train_sizes = [] # [ (dataset_1_train_size, dataset_2_train_size), ... ]
val_datasets = [] # [ (dataset_1_val, dataset_2_val), ... ]
val_sizes = [] # [ (dataset_1_val_size, dataset_2_val_size), ... ]
train_counts = [] # [ (dataset_1_train_counts, dataset_2_train_counts), ... ]
val_counts = [] # [ (dataset_1_val_counts, dataset_2_val_counts), ... ]

for d in training_datasets:
    print(f"Processing: {d}")
    train_dir = training_datasets[d].get('train')
    augment = training_datasets[d].get('augment', True)
    print("Augmenting" if augment else "Not augmenting", d)
    # Apply original and augmented data generators for training
    print("Creating generators for training")
    train_generator, augmented_train_generator = create_generators(train_dir, batch_size=batch_size, augment=augment, img_width=img_width, img_height=img_height)
    train_samples = samples_from_generators([train_generator, augmented_train_generator])
    train_count_dict = class_counts_from_generators(train_generator, augmented_train_generator)
    train_dataset = generators_to_dataset([train_generator, augmented_train_generator], batch_size=batch_size, img_height=img_height, img_width=img_width)
    # Apply original and augmented data generators for validation
    if "val" in training_datasets[d]:
        val_dir = training_datasets[d]['val']
        print("Creating generators for validation")
        val_generator, augmented_val_generator = create_generators(val_dir, batch_size=batch_size, augment=augment, shuffle=False, img_width=img_width, img_height=img_height)
        val_samples = samples_from_generators([val_generator, augmented_val_generator])
        val_count_dict = class_counts_from_generators(val_generator, augmented_val_generator)
        val_dataset = generators_to_dataset([train_generator, augmented_train_generator], batch_size=batch_size, img_height=img_height, img_width=img_width)
    else:
        print("No validation set, splitting training set.")
        train_dataset, val_dataset, train_samples, val_samples = val_split(train_dataset, train_samples, val_size=val_size)
        val_generator, augmented_val_generator = None, None
        val_count_dict = {k: 0 for k in train_count_dict.keys()}
    
    # Calculate the number of samples for training and validation
    train_sizes.append(train_samples)
    val_sizes.append(val_samples)
    
    train_counts.append(train_count_dict)
    val_counts.append(val_count_dict)
    train_datasets.append(train_dataset)
    val_datasets.append(val_dataset)
    dataset_names.append(d)
    
# Ensure that the lengths are consistent across the board before continuing
assert len(train_sizes) == len(train_datasets) == len(val_sizes) == len(val_datasets) == len(val_counts) == len(train_counts) == len(dataset_names), "Dataset lengths are inconsistent."


Processing: The Wildfire Dataset
Augmenting The Wildfire Dataset
Creating generators for training
Found 1887 images belonging to 2 classes.
Found 1887 images belonging to 2 classes.
--------------------
Number of samples in generator: 1887
Number of classes: 2
--------------------
Class indices: {'fire': 0, 'nofire': 1}
Class names: ['fire', 'nofire']
Dataset Class Counts:
fire: 730
nofire: 1157

Augmented Dataset Class Counts:
fire: 730
nofire: 1157


Combined Dataset Class Counts:
fire: 1460
nofire: 2314
--------------------
Creating generators for validation
Found 402 images belonging to 2 classes.
Found 402 images belonging to 2 classes.
--------------------
Number of samples in generator: 402
Number of classes: 2
--------------------
Class indices: {'fire': 0, 'nofire': 1}
Class names: ['fire', 'nofire']
Dataset Class Counts:
fire: 156
nofire: 246

Augmented Dataset Class Counts:
fire: 156
nofire: 246


Combined Dataset Class Counts:
fire: 312
nofire: 492
--------------------
Proc

### Brute Force Combinatorial Search

In [10]:
dataset_combos = [] # [(0,), (1,), (0, 1), ...] where 0, 1 are the indices of the datasets within their respective lists
for r in range(1, len(dataset_names) + 1):
    dataset_combos.extend(combinations(range(len(dataset_names)), r))
    
combined_training_datasets = []
combined_val_datasets = []
combined_dataset_names = []
steps_per_epoch_list = []
validation_steps_list = []
train_counts_list = []
val_counts_list = []

for combo in dataset_combos:
    training_dataset = None
    val_dataset = None
    train_size = None
    val_size = None
    train_count = None
    val_count = None
    for idx in combo:
        if training_dataset is None:
            training_dataset = train_datasets[idx]
            val_dataset = val_datasets[idx]
            train_size = train_sizes[idx]
            val_size = val_sizes[idx]
            train_count = train_counts[idx]
            val_count = val_counts[idx]
        else:
            training_dataset = training_dataset.concatenate(train_datasets[idx])
            val_dataset = val_dataset.concatenate(val_datasets[idx])
            train_size += train_sizes[idx]
            val_size += val_sizes[idx]
            train_count = {k: train_count.get(k, 0) + train_counts[idx].get(k, 0) for k in set(train_count) | set(train_counts[idx])}
            val_count = {k: val_count.get(k, 0) + val_counts[idx].get(k, 0) for k in set(val_count) | set(val_counts[idx])}
        train_count = {k: int(v) for k, v in train_count.items()}
        val_count = {k: int(v) for k, v in val_count.items()}

    combined_dataset_names.append("_".join([dataset_names[idx] for idx in combo]))
    combined_training_datasets.append(training_dataset)
    combined_val_datasets.append(val_dataset)
    steps_per_epoch_list.append(train_size // batch_size)
    validation_steps_list.append(val_size // batch_size)
    train_counts_list.append(train_count)
    val_counts_list.append(val_count)

    training_params = zip(combined_dataset_names, combined_training_datasets, combined_val_datasets, steps_per_epoch_list, validation_steps_list, train_counts_list, val_counts_list)

### Generate the test dataset

In [11]:
if full_test_dir is None:
    test_generators = []
    print("No target test directory provided, merging all tests from provided datasets if available.")
    for d in test_dirs:
        if d is not None:
            test_generators.append(create_generators(d, batch_size=batch_size, augment=False, shuffle=False, img_height=img_height, img_width=img_width)[0]) # No augmentation/shuffle for testing
    if len(test_generators) == 0:
        raise ValueError("No tests found in the provided datasets.")
    true_labels = np.concatenate([gen.classes for gen in test_generators])
    test_dataset = generators_to_dataset(test_generators, batch_size=batch_size)
    test_steps = sum([gen.samples for gen in test_generators]) // batch_size
    print("Test Dataset Class Counts:")
    for gen in test_generators:
        print("Class indices:", gen.class_indices)
        for class_name, class_index in gen.class_indices.items():
            print(f"{class_name}: {sum(gen.classes == class_index)}")
    print("\n")
    
else:
    test_generator, augmented_test_generator = create_generators(full_test_dir, batch_size=batch_size, augment=False, shuffle=False, img_height=img_height, img_width=img_width) # No augmentation/shuffle for testing
    test_dataset = create_dataset(test_generator, batch_size=batch_size, img_height=img_height, img_width=img_width)
    test_steps = test_generator.samples // batch_size
    true_labels = test_generator.classes
    print("Class indices:", test_generator.class_indices)
    print("\n")
    print("Test Dataset Class Counts:")
    for class_name, class_index in test_generator.class_indices.items():
        print(f"{class_name}: {sum(test_generator.classes == class_index)}")
    print("\n")

true_labels = true_labels[: (len(true_labels) // batch_size) * batch_size] # Ensure that the true labels are divisible by the batch size to avoid size mismatch with predictions.

Found 999 images belonging to 2 classes.
Class indices: {'fire': 0, 'nofire': 1}


Test Dataset Class Counts:
fire: 755
nofire: 244




### Model Preparation

In [12]:
def generate_model(bm, custom=False, to_dir=TEMP_DIR):
    if custom:
        model = bm
        model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list)
        os.makedirs(os.path.join(to_dir, model.name), exist_ok=True)
        model.save_weights(os.path.join(to_dir, model.name, f"{model.name}_initial.weights.h5"))
        return model
    
    base_model = bm(
        include_top=False,
        weights='imagenet',
        input_shape=(img_height, img_width, 3)
    )
    base_model.trainable = False

    # Create the model
    inputs = Input(shape=(img_height, img_width, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs, outputs, name=bm.__name__)
    model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list)
    os.makedirs(os.path.join(to_dir, model.name), exist_ok=True)
    model.save_weights(os.path.join(to_dir, model.name, f"{model.name}_initial.weights.h5"))
    return model

### Training and evaluating the models and combinations

In [13]:
run_number = len([d for d in os.listdir("runs") if os.path.isdir(os.path.join("runs", d)) and d.startswith('run_')]) + 1
run_dir = os.path.join("runs", f"run_{run_number}")
os.makedirs(run_dir, exist_ok=True)

In [14]:
run_config = {
    "datasets": training_datasets,
    "val_size": val_size,
    "hyperparameters": hyperparameters,
    "test_dirs": test_dirs,
    "full_test": full_test_dir,
}

with open(os.path.join(run_dir, "run_config.json"), "w") as f:
    json.dump(run_config, f, indent=4)

In [15]:
training_results = {}
results_file = os.path.join(run_dir, 'training_results.json')

for base_model, custom_bool in zip(all_models, is_custom_model):
    model = generate_model(base_model, custom=custom_bool, to_dir=run_dir) # To display the model summary
    model.summary()
    model_dir = os.path.join(run_dir, model.name)
    training_results[model.name] = {}
    plot_model(model, show_shapes=True, show_layer_names=True, to_file=os.path.join(model_dir, f"{model.name}_architecture.png"))
    for dataset_id, train_dataset, val_dataset, steps_per_epoch, validation_steps, train_counts_dict, val_counts_dict in training_params:
        model.load_weights(os.path.join(run_dir, model.name, f"{model.name}_initial.weights.h5"))
        print(f"Training model: {model.name} on dataset: {dataset_id}")
        
        # Record the start time
        start_time = time.time()

        # Initial training of the model
        history = model.fit(
            train_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            validation_data=val_dataset,
            validation_steps=validation_steps,
            callbacks=callbacks_list
        )

        # Record the end time
        end_time = time.time()
        # Calculate the training time
        training_time = end_time - start_time
        print(f"Training time: {training_time:.2f} seconds")

        model_ds_dir = os.path.join(model_dir, dataset_id)
        os.makedirs(model_ds_dir, exist_ok=True)
        # Save the model
        model.save(os.path.join(model_ds_dir, f"{model.name}_{dataset_id}.keras"))

        ### Evaluation stage ###
        optimal_threshold = full_eval(model_ds_dir, history, model, dataset_id, test_dataset, true_labels, test_steps)
        
        training_results[model.name][dataset_id] = {
            'history': history.history,
            'training_time': training_time,
            'optimal_threshold': float(optimal_threshold),
            'train_dataset_size': steps_per_epoch * batch_size, # Includes augmented data (2x)
            'val_dataset_size': validation_steps * batch_size, # Includes augmented data (2x)
            'train_counts': train_counts_dict,
            'val_counts': val_counts_dict,
            'train_counts_total': sum(train_counts_dict.values()),
            'val_counts_total': sum(val_counts_dict.values()),
            "evaluation": model.evaluate(test_dataset, return_dict=True, steps=test_steps)
        }
        print("Training results:")
        pprint(training_results[model.name][dataset_id])
        # Save the training results to a file after each iteration
        with open(results_file, 'w') as f:
            json.dump(training_results, f, indent=4)
        
        model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list) # Reset the model for the next iteration

Training model: VGG19 on dataset: The Wildfire Dataset
Epoch 1/80
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 2s/step - accuracy: 0.6217 - auc: 0.6734 - f1_score: 0.6672 - loss: 0.7895 - precision: 0.7198 - recall: 0.6285 - val_accuracy: 0.7475 - val_auc: 0.8410 - val_f1_score: 0.7787 - val_loss: 0.5965 - val_precision: 0.8190 - val_recall: 0.7556 - learning_rate: 0.0010
Epoch 2/80
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 3s/step - accuracy: 0.7568 - auc: 0.8269 - f1_score: 0.7988 - loss: 0.5315 - precision: 0.8006 - recall: 0.8002 - val_accuracy: 0.8225 - val_auc: 0.8984 - val_f1_score: 0.8476 - val_loss: 0.5177 - val_precision: 0.8699 - val_recall: 0.8344 - learning_rate: 0.0010
Epoch 3/80
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 3s/step - accuracy: 0.7590 - auc: 0.8396 - f1_score: 0.8043 - loss: 0.4943 - precision: 0.8015 - recall: 0.8121 - val_accuracy: 0.8250 - val_auc: 0.9241 - val_f1_score: 0.8433 - va

KeyboardInterrupt: 

In [15]:
print("Brute force loop completed!")
print(f"All models are now available at: {run_dir}")

Brute force loop completed!
All models and evaluations are available at: runs\run_11


In [None]:
eval_dir = os.path.join(run_dir, "evaluations")
os.makedirs(eval_dir, exist_ok=True)
rows = extract_evaluation_data(training_results)
df = pd.DataFrame(rows)
df.to_csv(os.path.join(eval_dir, "training_data.csv"), index=False)

In [None]:
plot_metric_chart(df, "Evaluation F1 Score", eval_dir)
plot_metric_chart(df, "Evaluation Accuracy", eval_dir)
plot_metric_chart(df, "Evaluation Precision", eval_dir)
plot_metric_chart(df, "Evaluation Recall", eval_dir)
plot_metric_chart(df, "Evaluation AUC", eval_dir)
plot_metric_chart(df, "Training Time", eval_dir)
plot_metric_chart(df, "Train Size", eval_dir)
plot_metric_chart(df, "Val Size", eval_dir)

plot_time_extrapolation(df, eval_dir)

print("All evaluations completed!")
print(f"Results are available at: {eval_dir}")