# Automated Brute-Force Training and Evaluation Pipeline for Datasets and Models - by Selman Tabet @ https://selman.io/

### Importing Libraries

In [1]:
# Data processing libraries
import numpy as np
from itertools import combinations # For brute force combinatoric search
import json # For saving and loading training results
import argparse # For command line arguments
import os
import time

# Tensorflow-Keras ML libraries
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model # To plot model architecture

from IPython import get_ipython # To check if code is running in Jupyter notebook
import importlib.util # To import config module from str
from pprint import pprint # To show config

# Custom helper libraries
from utils.img_processing import enforce_image_params
from utils.dataset_processors import * # Dataset and generator processing functions
from utils.plot_functions import * # Plotting functions
from utils.evaluator import * # Complete evaluation program
from utils.initializer import * # Set temp path and other initializations

Hostname:  Chaos
CUDA_VISIBLE_DEVICES: None
TF All Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
TF GPUs: []


## Specify pipeline parameters here
The configuration object is the most important part of the pipeline. It contains all the parameters that the pipeline will use to train and evaluate the models. The configuration object is a dictionary that MUST be named ***'default_cfg'***. The configuration object must contain the following
```python
{
    'datasets': {
        "dataset_name1": {
            'train': path str to the training dataset ***(required)***,
            'val': path str to the validation dataset (optional, recommended),
            'test': path str to the test dataset (optional),
        },
        "dataset_name2": {}, # Keep adding datasets as needed
        "dataset_name3": {},
        ...
    },
    'test': path str to the test dataset (optional, recommended),
    'val_size': float of the validation dataset to split (optional, default=0.2),
    'keras_models': [list of instances of the keras.applications base models]
    'custom_models': [list of instances of custom models, to be compiled without modification],
    'hyperparameters': {
        'batch_size': ***(required)*** int,
        'epochs': ***(required)*** int,
    },
    'optimizer': instance of keras.optimizers.Optimizer or str ***(required)***,
    'loss': instance of keras.losses.Loss or str ***(required)***,
    'image_width': int ***(required)***,
    'image_height': int ***(required)***,
    'metrics': [list of metric functions] ***(required)***,
    'callbacks': [list of instances of keras.callbacks.Callback] (optional),
    'enforce_image_params': bool to force RGB color mode and image sizes according to above specs (optional)
}

```
The "test" configuration key is optional and is used to specify a test dataset that will be used to evaluate all models after training. If the "test" key is not provided, the pipeline will take the "test" paths provided under each dataset, then combine them to form a consolidated test set to evaluate all models with.

**Note: Ensure that the dataset classes are in separate folders and that the folder names are the class names. The pipeline will automatically detect the classes from the dataset paths.**

In [3]:
from keras.metrics import Precision, Recall, AUC
from tensorflow.keras.applications import MobileNetV3Small, MobileNetV2, VGG19, ResNet50V2, Xception, DenseNet121
from custom_metrics import f1_score
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# WildfireNet model, for comparison to other SOTA models in dissertation.
from wildfirenet import create_wildfire_model

DATASETS = {
    "The Wildfire Dataset": {
        "train": os.path.join("datasets", "dataset_1", "train"),
        "test": os.path.join("datasets", "dataset_1", "test"),
        "val": os.path.join("datasets", "dataset_1", "val"),
    },
    "DeepFire": {
        "train": os.path.join("datasets", "dataset_2", "Training"),
        "test": os.path.join("datasets", "dataset_2", "Testing"),
    },
    "FIRE": {
        "train": os.path.join("datasets", "dataset_3"),
    },
}

default_cfg = {
    "datasets": DATASETS,  # The datasets to use
    # This overrides the test datasets stored under "datasets"
    "test": os.path.join("datasets", "d4_test"),
    "val_size": 0.2,  # The size of the validation dataset if splitting is needed
    "keras_models": [MobileNetV3Small, MobileNetV2, VGG19, ResNet50V2, Xception, DenseNet121],
    "custom_models": [create_wildfire_model(224, 224)],  # Custom models to use
    "hyperparameters": {
        "batch_size": 32,
        "epochs": 80,
    },
    "optimizer": "adam",
    "loss": "binary_crossentropy",
    "image_width": 224,
    "image_height": 224,
    "metrics": ['accuracy',  # Metrics functions, directly handed to model.compile
                Precision(name="precision"),
                Recall(name="recall"),
                AUC(name="auc"),
                f1_score
                ],
    "callbacks": [  # Callback functions, directly handed to model.fit
        EarlyStopping(monitor='val_loss', patience=5,
                      restore_best_weights=True),
        ModelCheckpoint(filepath=os.path.join("tmp", 'temp_model.keras'),
                        monitor='val_loss', save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                          patience=3, verbose=1)
    ],
    # If True, the image sizes and RGB colour mode will be enforced on all images
    "enforce_image_settings": True
}

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Parse arguments from command line

In [4]:
# Detect if script is running in a Jupyter notebook
# Generated using GPT-4o. Prompt: "Detect if running in a Jupyter notebook"
def in_notebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        else:
            return False  # Other type (terminal, etc.)
    except NameError:
        return False      # Probably standard Python interpreter
    
from_py = False # Flag to check if config was loaded from provided Python file
parser = argparse.ArgumentParser(
    description="Parse command line arguments")
parser.add_argument('--from-py-cfg', type=str,
                    help='Path to the config Python file')
if not in_notebook():
    args = parser.parse_args()
    config_file_path = args.from_py_cfg
    print(f"Python Config Path: {config_file_path}")
else:
    config_file_path = False

if config_file_path: # Load config from Python file
    spec = importlib.util.spec_from_file_location("config_module", config_file_path)
    config_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config_module)
    config = config_module.cfg
    print("Loaded config from Python file:")
    pprint(config)
    # Datasets, models, and hyperparameters are mandatory and must be processed now.
    training_datasets = config.get('datasets', {})
    full_test_dir = config.get('test')
    base_models = config.get('keras_models', [])
    custom_models = config.get('custom_models', [])
    hyperparameters = config.get('hyperparameters')
    default_hyperparameters = default_cfg.get('hyperparameters', {})
    if hyperparameters is None or len(hyperparameters) == 0:
        print("No training hyperparameters defined in config, using defaults defined above.")
        hyperparameters = default_hyperparameters # Use default hyperparameters
    else: # Check for missing hyperparameters and fall back to defaults
        for key, value in default_hyperparameters.items():
            if key not in hyperparameters:
                print(f"Missing hyperparameter - falling back to default {key}:{default_hyperparameters[key]}")
                hyperparameters[key] = default_hyperparameters.get(key)
    from_py = True # Successfully completed the config import
else:
    print("No Python config file specified, using default (notebook) config.")
    config = default_cfg
    training_datasets = config.get('datasets', {})
    base_models = config.get('keras_models', [])
    custom_models = config.get('custom_models', [])
    hyperparameters = config.get('hyperparameters', {"epochs": 50, "batch_size": 32})
    full_test_dir = config.get('test')
# Check if training datasets are defined
if training_datasets is None or len(training_datasets) == 0:
    raise ValueError("No train datasets defined in config.")
# Check if either base_models or custom_models are defined
if base_models is None or len(base_models) == 0:
    if custom_models is None or len(custom_models) == 0:
        raise ValueError("No models defined in config.")

No Python config file specified, using default (notebook) config.


### Parsing parameters

In [5]:
train_dirs = [training_datasets[ds].get('train') for ds in training_datasets]
test_dirs = [training_datasets[ds].get('test') for ds in training_datasets]
val_dirs = [training_datasets[ds].get('val') for ds in training_datasets]

# Combine all directories for image params enforcement
all_dirs = train_dirs + test_dirs + val_dirs + [full_test_dir]
all_dirs = [d for d in all_dirs if d is not None] # Remove None values

# Combine base_models and custom_models to be looped over in training
all_models = base_models + custom_models
# Create a list to keep track of which models are custom
is_custom_model = [False] * len(base_models) + [True] * len(custom_models)

### Setting parameters

In [6]:
if from_py:
    # Load parameters from custom config script
    epochs = hyperparameters.get('epochs') # Guaranteed to be present
    batch_size = hyperparameters.get('batch_size') # Guaranteed to be present
    img_height = config.get('image_height', default_cfg.get('image_height'))
    img_width = config.get('image_width', default_cfg.get('image_width'))
    optimizer_fn = config.get('optimizer', default_cfg.get('optimizer'))
    loss_fn = config.get('loss', default_cfg.get('loss'))
    callbacks_list = config.get('callbacks', default_cfg.get('callbacks'))
    metrics_list = config.get('metrics', default_cfg.get('metrics'))
    enforce_image_size = config.get('enforce_image_settings', default_cfg.get('enforce_image_settings'))
    val_size = config.get('val_size', default_cfg.get('val_size'))
else:
    # Load parameters from default config object
    epochs = hyperparameters.get('epochs', 50)
    batch_size = hyperparameters.get('batch_size', 32)
    img_height = default_cfg.get('image_height', 224)
    img_width = default_cfg.get('image_width', 224)
    optimizer_fn = default_cfg.get('optimizer', 'adam')
    loss_fn = default_cfg.get('loss', 'binary_crossentropy')
    callbacks_list = default_cfg.get('callbacks', [])
    metrics_list = default_cfg.get('metrics', ['accuracy'])
    enforce_image_size = default_cfg.get('enforce_image_settings', False)
    val_size = default_cfg.get('val_size', 0.2)


### Enforce defined resolution and colour mode

In [None]:
if enforce_image_size: # Enforce image size and RGB colour mode for all images
    for directory in all_dirs:
        print(f"Adjusting image properties in {directory}")
        enforce_image_params(directory, target_size=(img_width, img_height))

Adjusting image properties in datasets\dataset_1\train
Adjusting image properties in datasets\dataset_2\Training
Adjusting image properties in datasets\dataset_3
Adjusting image properties in datasets\dataset_1\test
Adjusting image properties in datasets\dataset_2\Testing
Adjusting image properties in datasets\dataset_1\val
Adjusting image properties in datasets\d4_test


### Create training and validation generators

In [None]:
dataset_names = [] # [ "dataset_1", "dataset_2", ... ]
train_generators = [] # [ (dataset_1_train, dataset_2_train), ... ]
train_sizes = [] # [ (dataset_1_train_size, dataset_2_train_size), ... ]
val_generators = [] # [ (dataset_1_val, dataset_2_val), ... ]
val_sizes = [] # [ (dataset_1_val_size, dataset_2_val_size), ... ]
train_counts = [] # [ (dataset_1_train_counts, dataset_2_train_counts), ... ]
val_counts = [] # [ (dataset_1_val_counts, dataset_2_val_counts), ... ]

for d in training_datasets:
    print(f"Processing: {d}")
    train_dir = training_datasets[d].get('train')
    augment = training_datasets[d].get('augment', True)
    print("Augmenting" if augment else "Not augmenting", d)
    # Apply original and augmented data generators for training
    print("Creating generators for training")
    if "val" in training_datasets[d]: # Use separate validation dataset
        train_generator = create_generator(train_dir, batch_size=batch_size, augment=augment, img_width=img_width, img_height=img_height)
        val_generator = create_generator(training_datasets[d]['val'], batch_size=batch_size, augment=False, shuffle=False, img_width=img_width, img_height=img_height)
    else: # Split the training dataset into training and validation
        train_generator, val_generator = create_split_generators(train_dir, val_size=val_size, batch_size=batch_size, augment=augment, img_width=img_width, img_height=img_height)

    train_samples = train_generator.samples
    class_indices = train_generator.class_indices # Class indices must be consistent across training and validation, assertion will be made later
    train_count_dict = class_counts_from_generator(train_generator)
    
    val_samples = val_generator.samples
    val_count_dict = class_counts_from_generator(val_generator)
    
    # Calculate the number of samples for training and validation
    train_sizes.append(train_samples)
    val_sizes.append(val_samples)
    
    train_counts.append(train_count_dict)
    val_counts.append(val_count_dict)
    train_generators.append(train_generator)
    val_generators.append(val_generator)
    dataset_names.append(d)
    
# Ensure that the lengths are consistent across the board before continuing
assert len(train_sizes) == len(train_generators) == len(val_sizes) == len(val_generators) == len(val_counts) == len(train_counts) == len(dataset_names), "Dataset lengths are inconsistent."


Processing: The Wildfire Dataset
Augmenting The Wildfire Dataset
Creating generators for training
Found 1887 images belonging to 2 classes.
Found 402 images belonging to 2 classes.
--------------------
Number of samples in generator: 1887
Number of classes: 2
--------------------
Class indices: {'fire': 0, 'nofire': 1}
Class names: ['fire', 'nofire']
Dataset Class Counts:
fire: 730
nofire: 1157
--------------------
--------------------
Number of samples in generator: 402
Number of classes: 2
--------------------
Class indices: {'fire': 0, 'nofire': 1}
Class names: ['fire', 'nofire']
Dataset Class Counts:
fire: 156
nofire: 246
--------------------
Processing: DeepFire
Augmenting DeepFire
Creating generators for training
Found 1216 images belonging to 2 classes.
Found 304 images belonging to 2 classes.
--------------------
Number of samples in generator: 1216
Number of classes: 2
--------------------
Class indices: {'fire': 0, 'nofire': 1}
Class names: ['fire', 'nofire']
Dataset Class Co

### Brute Force Combinatorial Search Space Definition

In [9]:
dataset_combos = [] # [(0,), (1,), (0, 1), ...] where 0, 1 are the indices of the datasets within their respective lists
for r in range(1, len(dataset_names) + 1): # For all combination sizes
    # Generate all possible combinations of datasets for each size
    dataset_combos.extend(combinations(range(len(dataset_names)), r))
    
combined_training_datasets = []
combined_val_datasets = []
combined_dataset_names = []
steps_per_epoch_list = []
validation_steps_list = []
train_counts_list = []
val_counts_list = []

for combo in dataset_combos:
    # Initialize variables for each combination
    train_generators_list = None
    val_generators_list = None
    train_size = None
    val_size = None
    train_count = None
    val_count = None
    for idx in combo:
        if train_generators_list is None:
            # Initialize the lists with the first dataset
            train_generators_list = [train_generators[idx]]
            val_generators_list = [val_generators[idx]]
            train_size = train_sizes[idx]
            val_size = val_sizes[idx]
            train_count = train_counts[idx]
            val_count = val_counts[idx]
        else:
            # Combine the rest of the datasets
            train_generators_list.append(train_generators[idx])
            val_generators_list.append(val_generators[idx])
            train_size += train_sizes[idx]
            val_size += val_sizes[idx]
            train_count = {k: train_count.get(k, 0) + train_counts[idx].get(k, 0) for k in set(train_count) | set(train_counts[idx])}
            val_count = {k: val_count.get(k, 0) + val_counts[idx].get(k, 0) for k in set(val_count) | set(val_counts[idx])}
        train_count = {k: int(v) for k, v in train_count.items()} # NumPy int64 to int cast
        val_count = {k: int(v) for k, v in val_count.items()}

    # Combine all accumulated generators for this combination into a single dataset
    training_dataset = generators_to_dataset(train_generators_list, batch_size=batch_size, img_height=img_height, img_width=img_width)
    val_dataset = generators_to_dataset(val_generators_list, batch_size=batch_size, img_height=img_height, img_width=img_width)
    # Append the combined datasets and other relevant parameters to their respective lists
    combined_dataset_names.append("_".join([dataset_names[idx] for idx in combo]))
    combined_training_datasets.append(training_dataset)
    combined_val_datasets.append(val_dataset)
    steps_per_epoch_list.append(train_size // batch_size)
    validation_steps_list.append(val_size // batch_size)
    train_counts_list.append(train_count)
    val_counts_list.append(val_count)
    # Zip all the lists together for easier unpacking in the training loop
    training_params = list(zip(combined_dataset_names, combined_training_datasets, combined_val_datasets, steps_per_epoch_list, validation_steps_list, train_counts_list, val_counts_list))

### Generate the test dataset

In [10]:
if full_test_dir is None:
    test_generators = []
    print("No target test directory provided, merging all tests from provided datasets if available.")
    for d in test_dirs:
        if d is not None:
            test_generators.append(create_generator(d, batch_size=batch_size, augment=False, shuffle=False, img_height=img_height, img_width=img_width)) # No augmentation/shuffle for testing
    if len(test_generators) == 0:
        raise ValueError("No tests found in the provided datasets.")

    test_steps = sum([gen.samples for gen in test_generators]) // batch_size
    test_dataset = generators_to_dataset(test_generators, batch_size=batch_size, img_height=img_height, img_width=img_width)
else:
    test_generators = [create_generator(full_test_dir, batch_size=batch_size, augment=False, shuffle=False, img_height=img_height, img_width=img_width)] # No augmentation/shuffle for testing
    test_steps = test_generators[0].samples // batch_size
    test_dataset = create_dataset(test_generators[0], batch_size=batch_size, img_height=img_height, img_width=img_width)
if len(test_generators) > 0:
    assert test_generators[0].class_indices == train_generators[0].class_indices, "Test and training class indices do not match, check the provided directories and their class names."
else:
    raise ValueError("No test generators were created.")

print("Test Dataset Class Counts:")
for gen in test_generators:
    print("Class indices:", gen.class_indices)
    for class_name, class_index in gen.class_indices.items():
        print(f"{class_name}: {sum(gen.classes == class_index)}")
print("\n")

Found 400 images belonging to 2 classes.
Test Dataset Class Counts:
Class indices: {'fire': 0, 'nofire': 1}
fire: 200
nofire: 200




### Model Preparation

In [11]:
def generate_model(bm, custom=False, to_dir=TEMP_DIR):
    if custom: # Custom models are compiled and saved as is
        model = bm
        model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list)
        os.makedirs(os.path.join(to_dir, model.name), exist_ok=True)
        model.save_weights(os.path.join(to_dir, model.name, f"{model.name}_initial.weights.h5"))
        return model
    
    base_model = bm(
        include_top=False, 
        weights='imagenet', # Use pre-trained weights for transfer learning
        input_shape=(img_height, img_width, 3)
    )
    base_model.trainable = False # Freeze the base model weights for transfer learning

    # Create the model
    inputs = Input(shape=(img_height, img_width, 3))
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x) # Pooling layer for dimensionality reduction
    x = BatchNormalization()(x) # Batch normalization layer for stability
    x = Dropout(0.5)(x) # Regularization layer
    x = Dense(256, activation='relu')(x) # Trainable layer for this application
    x = BatchNormalization()(x) # Batch normalization layer
    x = Dropout(0.5)(x) # Regularization layer
    outputs = Dense(1, activation='sigmoid')(x) # Binary classification output

    model = Model(inputs, outputs, name=bm.__name__)
    model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list)
    os.makedirs(os.path.join(to_dir, model.name), exist_ok=True)
    model.save_weights(os.path.join(to_dir, model.name, f"{model.name}_initial.weights.h5"))
    return model

### Training and evaluating the models and combinations

In [12]:
run_number = len([d for d in os.listdir("runs") if os.path.isdir(os.path.join("runs", d)) and d.startswith('run_')]) + 1
run_dir = os.path.join("runs", f"run_{run_number}")
os.makedirs(run_dir, exist_ok=True)

In [13]:
run_config = { # Save basic run info for reference
    "datasets": training_datasets,
    "val_size": val_size,
    "hyperparameters": hyperparameters,
    "test_dirs": test_dirs,
    "full_test": full_test_dir,
    "number_of_models": len(all_models),
}

with open(os.path.join(run_dir, "run_config.json"), "w") as f:
    json.dump(run_config, f, indent=4)

In [None]:
training_results = {}
results_file = os.path.join(run_dir, 'training_results.json')

for base_model, custom_bool in zip(all_models, is_custom_model):
    model = generate_model(base_model, custom=custom_bool, to_dir=run_dir) # Generate the model and its initial weights
    model.summary()
    model_dir = os.path.join(run_dir, model.name)
    training_results[model.name] = {} # Initialize the model results dictionary
    plot_model(model, show_shapes=True, show_layer_names=True, to_file=os.path.join(model_dir, f"{model.name}_architecture.png"))
    # Main training and evaluation loop - unpack the training parameters for each combination
    for dataset_id, train_dataset, val_dataset, steps_per_epoch, validation_steps, train_counts_dict, val_counts_dict in training_params:
        # Reload the initial weights for each dataset
        model.load_weights(os.path.join(run_dir, model.name, f"{model.name}_initial.weights.h5"))
        print(f"Training model: {model.name} on dataset: {dataset_id}")
        # Calculate class weights of the current dataset for class-weighted training
        class_weights = class_weights_from_counts(train_counts_dict, class_indices=class_indices)
        print("Class weights:", class_weights)
        # Record the start time
        start_time = time.time()

        # Initial training of the model
        history = model.fit(
            train_dataset,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            validation_data=val_dataset,
            validation_steps=validation_steps,
            callbacks=callbacks_list,
            class_weight=class_weights
        )

        # Record the end time
        end_time = time.time()
        # Calculate the training time
        training_time = end_time - start_time
        print(f"Training time: {training_time:.2f} seconds")

        model_ds_dir = os.path.join(model_dir, dataset_id)
        os.makedirs(model_ds_dir, exist_ok=True)
        # Save the model
        model.save(os.path.join(model_ds_dir, f"{model.name}_{dataset_id}.keras"))

        ###### Evaluation stage ######
        optimal_threshold = full_evaluation(model_ds_dir, history, model, dataset_id, test_generators)
        evaluation = model.evaluate(test_dataset, return_dict=True, steps=test_steps)

        training_results[model.name][dataset_id] = {
            'history': history.history,
            'training_time': training_time,
            'optimal_threshold': float(optimal_threshold),
            'train_dataset_size': steps_per_epoch * batch_size,
            'val_dataset_size': validation_steps * batch_size,
            'train_counts': train_counts_dict,
            'val_counts': val_counts_dict,
            'train_counts_total': sum(train_counts_dict.values()),
            'val_counts_total': sum(val_counts_dict.values()),
            'class_weights': {k: float(v) for k, v in class_weights.items()}, # np.float64 to float typecast
            "evaluation": evaluation
        }
        print(f"Training results for {model.name} on {dataset_id}:")
        pprint(training_results[model.name][dataset_id]) # Print the results for this model and dataset
        # Save the training results to a file after each iteration
        with open(results_file, 'w') as f:
            json.dump(training_results, f, indent=4)
        
        model.compile(optimizer=optimizer_fn, loss=loss_fn, metrics=metrics_list) # Reset the model for the next iteration

Training model: MobileNetV3Small on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 246ms/step - accuracy: 0.5747 - auc: 0.6162 - f1_score: 0.6061 - loss: 0.8566 - precision: 0.7019 - recall: 0.5414 - val_accuracy: 0.4062 - val_auc: 0.6685 - val_f1_score: 0.0000e+00 - val_loss: 0.7055 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 203ms/step - accuracy: 0.6262 - auc: 0.6787 - f1_score: 0.6573 - loss: 0.8008 - precision: 0.7255 - recall: 0.6064 - val_accuracy: 0.6406 - val_auc: 0.7412 - val_f1_score: 0.6619 - val_loss: 0.6732 - val_precision: 0.6406 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 217ms/step - accuracy: 0.6639 - auc: 0.7186 - f1_score: 0.6939 - loss: 0.6924 - precision: 0.7692 

  f1_scores = 2 * (precision * recall) / (precision + recall)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.6812 - auc: 0.4946 - f1_score: 0.2414 - loss: 0.7166 - precision: 0.3628 - recall: 0.5286         
Training results:
{'class_weights': {0: 1.1595665171898355, 1: 0.8790368271954674},
 'evaluation': {'accuracy': 0.7369791865348816,
                'auc': 0.783953845500946,
                'f1_score': 0.5025008320808411,
                'loss': 0.5909507870674133,
                'precision': 0.7106382846832275,
                'recall': 0.8349999785423279},
 'history': {'accuracy': [0.6149088740348816,
                          0.6497395634651184,
                          0.6588541865348816,
                          0.6910807490348816,
                          0.6956380009651184,
     

  f1_scores = 2 * (precision * recall) / (precision + recall)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - accuracy: 0.2442 - auc: 0.5389 - f1_score: 0.2538 - loss: 0.7467 - precision: 0.2442 - recall: 0.6154             
Training results:
{'class_weights': {0: 0.8316831683168316, 1: 1.2537313432835822},
 'evaluation': {'accuracy': 0.5208333134651184,
                'auc': 0.8603940606117249,
                'f1_score': 0.5436508059501648,
                'loss': 0.6802977919578552,
                'precision': 0.5208333134651184,
                'recall': 1.0},
 'history': {'accuracy': [0.685019850730896,
                          0.75942462682724,
                          0.7698412537574768,
                          0.7708333134651184,
                          0.7678571343421936,
                  

Training model: MobileNetV2 on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 376ms/step - accuracy: 0.6694 - auc: 0.7518 - f1_score: 0.6981 - loss: 0.7704 - precision: 0.7560 - recall: 0.7244 - val_accuracy: 0.8047 - val_auc: 0.9057 - val_f1_score: 0.6216 - val_loss: 0.4090 - val_precision: 0.7703 - val_recall: 0.9561 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 324ms/step - accuracy: 0.7502 - auc: 0.8415 - f1_score: 0.7829 - loss: 0.5759 - precision: 0.8456 - recall: 0.7292 - val_accuracy: 0.8464 - val_auc: 0.9278 - val_f1_score: 0.6152 - val_loss: 0.3386 - val_precision: 0.8536 - val_recall: 0.8947 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 338ms/step - accuracy: 0.7843 - auc: 0.8650 - f1_score: 0.8124 - loss: 0.5155 - precision: 0.8420 - recall: 0.7871 

  f1_scores = 2 * (precision * recall) / (precision + recall)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 202ms/step - accuracy: 0.6591 - auc: 0.5254 - f1_score: 0.2506 - loss: 0.7159 - precision: 0.3646 - recall: 0.5556  
Training results:
{'class_weights': {0: 1.2924657534246575, 1: 0.8154710458081245},
 'evaluation': {'accuracy': 0.7682291865348816,
                'auc': 0.8524048924446106,
                'f1_score': 0.5395670533180237,
                'loss': 0.5180451273918152,
                'precision': 0.7211155295372009,
                'recall': 0.9049999713897705},
 'history': {'accuracy': [0.727909505367279,
                          0.7623922228813171,
                          0.7920258641242981,
                          0.7990301847457886,
                          0.8135775923728943,
        

Training model: VGG19 on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 6s/step - accuracy: 0.5407 - auc: 0.7001 - f1_score: 0.5812 - loss: 0.9877 - precision: 0.6889 - recall: 0.6575 - val_accuracy: 0.4661 - val_auc: 0.7379 - val_f1_score: 0.1097 - val_loss: 0.7053 - val_precision: 0.9600 - val_recall: 0.1053 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 6s/step - accuracy: 0.6456 - auc: 0.7102 - f1_score: 0.6777 - loss: 0.7485 - precision: 0.7611 - recall: 0.6181 - val_accuracy: 0.6979 - val_auc: 0.7823 - val_f1_score: 0.5957 - val_loss: 0.5984 - val_precision: 0.6867 - val_recall: 0.9035 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m318s[0m 6s/step - accuracy: 0.6709 - auc: 0.7386 - f1_score: 0.7050 - loss: 0.6897 - precision: 0.7753 - recall: 0.6542 - val_accura

Training model: ResNet50V2 on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 756ms/step - accuracy: 0.6326 - auc: 0.7236 - f1_score: 0.6681 - loss: 0.7981 - precision: 0.7293 - recall: 0.7146 - val_accuracy: 0.8411 - val_auc: 0.9255 - val_f1_score: 0.5816 - val_loss: 0.3621 - val_precision: 0.8848 - val_recall: 0.8421 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 718ms/step - accuracy: 0.7658 - auc: 0.8470 - f1_score: 0.7946 - loss: 0.5797 - precision: 0.8467 - recall: 0.7499 - val_accuracy: 0.8620 - val_auc: 0.9240 - val_f1_score: 0.6054 - val_loss: 0.3494 - val_precision: 0.8855 - val_recall: 0.8816 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 725ms/step - accuracy: 0.7770 - auc: 0.8620 - f1_score: 0.8073 - loss: 0.5352 - precision: 0.8615 - recall: 0.7645 -

Training model: Xception on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 1s/step - accuracy: 0.6665 - auc: 0.7975 - f1_score: 0.6996 - loss: 0.7899 - precision: 0.7881 - recall: 0.7319 - val_accuracy: 0.8151 - val_auc: 0.8890 - val_f1_score: 0.6227 - val_loss: 0.4319 - val_precision: 0.7875 - val_recall: 0.9430 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 1s/step - accuracy: 0.7335 - auc: 0.8187 - f1_score: 0.7727 - loss: 0.6381 - precision: 0.8159 - recall: 0.7351 - val_accuracy: 0.8281 - val_auc: 0.9049 - val_f1_score: 0.6093 - val_loss: 0.4197 - val_precision: 0.8403 - val_recall: 0.8772 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 1s/step - accuracy: 0.7779 - auc: 0.8598 - f1_score: 0.8087 - loss: 0.5066 - precision: 0.8496 - recall: 0.7750 - val_accura

Training model: DenseNet121 on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 954ms/step - accuracy: 0.6486 - auc: 0.7973 - f1_score: 0.6878 - loss: 0.7502 - precision: 0.7780 - recall: 0.7236 - val_accuracy: 0.8516 - val_auc: 0.9300 - val_f1_score: 0.6085 - val_loss: 0.3598 - val_precision: 0.8670 - val_recall: 0.8860 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 893ms/step - accuracy: 0.7716 - auc: 0.8549 - f1_score: 0.8005 - loss: 0.5317 - precision: 0.8511 - recall: 0.7604 - val_accuracy: 0.8620 - val_auc: 0.9325 - val_f1_score: 0.6144 - val_loss: 0.3391 - val_precision: 0.8723 - val_recall: 0.8991 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 891ms/step - accuracy: 0.7887 - auc: 0.8802 - f1_score: 0.8212 - loss: 0.4725 - precision: 0.8801 - recall: 0.7715 

Training model: WildfireNet on dataset: The Wildfire Dataset
Class weights: {0: 1.2924657534246575, 1: 0.8154710458081245}
Epoch 1/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 1s/step - accuracy: 0.5473 - auc: 0.7449 - f1_score: 0.5904 - loss: 0.9668 - precision: 0.7347 - recall: 0.6594 - val_accuracy: 0.5182 - val_auc: 0.5396 - val_f1_score: 0.3367 - val_loss: 0.7399 - val_precision: 0.6667 - val_recall: 0.3772 - learning_rate: 0.0010
Epoch 2/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 777ms/step - accuracy: 0.6772 - auc: 0.7374 - f1_score: 0.7183 - loss: 0.6723 - precision: 0.7758 - recall: 0.6714 - val_accuracy: 0.5469 - val_auc: 0.5846 - val_f1_score: 0.5189 - val_loss: 0.7066 - val_precision: 0.6007 - val_recall: 0.7061 - learning_rate: 0.0010
Epoch 3/80
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 803ms/step - accuracy: 0.6684 - auc: 0.7347 - f1_score: 0.7013 - loss: 0.6850 - precision: 0.7784 - recall: 0.6422 - v

  f1_scores = 2 * (precision * recall) / (precision + recall)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 134ms/step - accuracy: 0.2323 - auc: 0.1555 - f1_score: 0.2266 - loss: 1.4041 - precision: 0.2246 - recall: 0.5207  
Training results:
{'class_weights': {0: 1.2924657534246575, 1: 0.8154710458081245},
 'evaluation': {'accuracy': 0.4427083432674408,
                'auc': 0.25038042664527893,
                'f1_score': 0.48101910948753357,
                'loss': 1.0483750104904175,
                'precision': 0.4792899489402771,
                'recall': 0.8100000023841858},
 'history': {'accuracy': [0.6056034564971924,
                          0.6885775923728943,
                          0.6788793206214905,
                          0.7160560488700867,
                          0.7219827771186829,
     

In [15]:
print("Brute force loop completed!")
print(f"All models are now available at: {run_dir}")

Brute force loop completed!
All models are now available at: runs\run_30


### Aggregation Stage

In [None]:
eval_dir = os.path.join(run_dir, "evaluations")
os.makedirs(eval_dir, exist_ok=True)
rows = extract_evaluation_data(training_results)
df = pd.DataFrame(rows)
df.to_csv(os.path.join(eval_dir, "training_data.csv"), index=False)

In [3]:
plot_metric_chart(df, "Training Time", eval_dir, highlight_max=False)
plot_dataset_sizes(df, eval_dir)

for metric in evaluation:
    if metric == "loss":
        continue # Loss is not useful in this context
    else:
        plot_metric_chart(df, metric, eval_dir)

plot_time_extrapolation(df, eval_dir)

print("All evaluations completed!")
print(f"Results are available at: {eval_dir}")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  plot = sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  plot = sns.barplot(


Average Training Time: 1515.473234994071
Number of Distinct Models: 7
Number of Singular Datasets: 3
All evaluations completed!
Results are available at: runs\run_30\evaluations
