# Model Generation

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import pynattas as pnas
from pynattas import classes, functions
import pytorch_lightning as pl
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split, TensorDataset
from torchvision.models import resnet18
from datasets.RawClassifier.loader import RawClassifierDataModule, RawClassifierDataset
import configparser
import numpy as np
import pandas as pd

import pickle
pd.set_option('display.max_colwidth', None)

# Define dataset module
root_dir = '/Data_large/marine/PythonProjects/OtherProjects/lpl-PyNas/data/RawClassifier'
dm = RawClassifierDataModule(root_dir, batch_size=4, num_workers=2, transform=None)


config = configparser.ConfigParser()
config.read('config.ini')

# Model parameters
max_layers = int(config.getint('NAS', 'max_layers'))
max_iter = int(config['GA']['max_iterations'])
# GA parameters
n_individuals = int(config['GA']['population_size'])
mating_pool_cutoff = float(config['GA']['mating_pool_cutoff'])
mutation_probability = float(config['GA']['mutation_probability'])
# Logging
logs_directory = str(config['GA']['logs_dir_GA'])

# Torch stuff
seed = config.getint(section='Computation', option='seed')
pl.seed_everything(seed=seed, workers=True)  # For reproducibility
torch.set_float32_matmul_precision("medium")  # to make lightning happy
num_workers = config.getint(section='Computation', option='num_workers')
accelerator = config.get(section='Computation', option='accelerator')

log_learning_rate=None
batch_size=None
# Get model parameters
log_lr = log_learning_rate if log_learning_rate is not None else config.getfloat(section='Search Space', option='default_log_lr')

lr = 10**log_lr
bs = batch_size if batch_size is not None else config.getint(section='Search Space', option='default_bs')
print(f"-----------The batch size of the data to be loaded in the model is: {bs}-----------")
def initialize_logging(max_iter):
    mean_fitness_vector = np.zeros(shape=(max_iter + 1))
    median_fitness_vector = np.zeros_like(mean_fitness_vector)
    best_fitness_vector = np.zeros_like(mean_fitness_vector)
    iou_vector = np.zeros_like(mean_fitness_vector)
    fps_vector = np.zeros_like(mean_fitness_vector)
    model_size_vector = np.zeros_like(mean_fitness_vector)

    historical_best_fitness = float('-inf')
    historical_best_iou = float('-inf')
    historical_best_fps = float('-inf')
    historical_best_model_size = float('inf')

    best_individual = None  # To keep track of the best individual

    return {
        "mean_fitness_vector": mean_fitness_vector,
        "median_fitness_vector": median_fitness_vector,
        "best_fitness_vector": best_fitness_vector,
        "iou_vector": iou_vector,
        "fps_vector": fps_vector,
        "model_size_vector": model_size_vector,
        "historical_best_fitness": historical_best_fitness,
        "historical_best_iou": historical_best_iou,
        "historical_best_fps": historical_best_fps,
        "historical_best_model_size": historical_best_model_size,
        "best_individual": best_individual,
    }

Seed set to 42


-----------The batch size of the data to be loaded in the model is: 4-----------


### Configuration

In [2]:
class ModelConstructor:
    def __init__(self, encoder, dm, verbose=False):
        # Validate that dm has the necessary attributes.
        if not hasattr(dm, "num_classes") or not hasattr(dm, "input_shape"):
            raise ValueError("dm must have 'num_classes' and 'input_shape' attributes.")
        
        self.encoder = encoder
        self.num_classes = dm.num_classes
        self.input_shape = dm.input_shape
        self.verbose = verbose
        if self.verbose:
            print(f"Input shape: {self.input_shape}")
        
        # Verify that encoder has parameters.
        try:
            next(self.encoder.parameters())
        except StopIteration:
            raise ValueError("Encoder appears to have no parameters.")
        except Exception as e:
            raise ValueError("Provided encoder does not follow expected API.") from e

        # Validate input_shape is a tuple and properly dimensioned.
        if not isinstance(self.input_shape, tuple):
            raise TypeError("input_shape must be a tuple.")
        if len(self.input_shape) == 3:
            if self.verbose:
                print("Adding channel dimension to input shape.")
                print(f"Original input shape: {self.input_shape}")
            self.input_shape = (1,) + self.input_shape
            if self.verbose:
                print(f"Updated input shape: {self.input_shape}")
        elif len(self.input_shape) != 4:
            raise ValueError("input_shape must be of length 3 or 4.")

        self.head_layer = self.build_head(input_shape=self.input_shape)
        
        self.model = nn.Sequential(
            self.encoder,
            self.head_layer
        )
        
        self.valid_model = self.dummy_test()

    def build_head(self, input_shape=(1, 2, 256, 256)):
        # Get the device from the encoder's parameters.
        try:
            device = next(self.encoder.parameters()).device
        except Exception as e:
            raise ValueError("Unable to determine device from encoder parameters.") from e
        
        # Run a dummy input through the encoder to get the feature shape.
        dummy = torch.randn(*input_shape).float().to(device)
        try:
            features = self.encoder(dummy)
        except Exception as e:
            raise RuntimeError("Error when running dummy input through encoder.") from e
        
        if not isinstance(features, torch.Tensor):
            raise TypeError("Encoder output should be a torch.Tensor.")

        if self.verbose:
            print("Feature map shape from the feature extractor:", features.shape)

        # Check that the features tensor has at least 2 dimensions.
        if features.dim() < 2:
            raise ValueError("Encoded features should have at least 2 dimensions.")
        
        # Determine the number of channels from the dummy output.
        feature_channels = features.shape[1]

        # Build the head layer.
        head_layer = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(feature_channels, self.num_classes)
        )
        if self.verbose:
            print("Constructed head layer:", head_layer)
        return head_layer
    
    
    def dummy_test(self):
        try:
            device = next(self.encoder.parameters()).device
            dummy = torch.randn(*self.input_shape).float().to(device)
            output = self.model(dummy)
            if self.verbose:
                print("Network test passed. Output shape from the model:", output.shape)
            
            if not isinstance(output, torch.Tensor):
                raise TypeError("Output of the model should be a torch.Tensor.")
            
            if output.shape[0] != dummy.shape[0]:
                raise ValueError("Batch size mismatch between input and output.")
            
            return True
        except Exception as e:
            if self.verbose:
                print("An error occurred during dummy_test:", e)
            return False
    
    
    def forward(self, x):
        if not isinstance(x, torch.Tensor):
            raise TypeError("Input must be a torch.Tensor.")
        return self.model(x)

In [3]:
from mutation import gene_mutation
from crossover import single_point_crossover

class Population:
    def __init__(self, n_individuals, max_layers, dm, max_parameters=100000):
        self.dm = dm # Data module for model creation
        
        self.n_individuals = n_individuals
        self.max_layers = max_layers
        self.generation = 0
        self.max_parameters = max_parameters
        
        
    def initial_poll(self):
        """
        Generate the initial population of individuals.    
        """
        
        self.population = self.create_population()
        self._update_df()
        self.save_dataframe()
        self.save_population()


    def create_random_individual(self):
        """
        Create a random individual with a random number of layers.
        """
        return classes.Individual(max_layers=self.max_layers)
    

    def sort_population(self):
        """
        Sort the population by fitness.
        """
        self.population = sorted(self.population, key=lambda individual: individual.fitness, reverse=True)
        self.checkpoint()
        

    def checkpoint(self):
        """
        Save the current population.
        """
        self._update_df()
        self.save_population()
        self.save_dataframe()
    
    
    def check_individual(self, individual):
        try:
            model_representation, is_valid = self.build_model(individual.parsed_layers)
            if is_valid:
                modelSize = self.evaluate_parameters(model_representation)
                individual.model_size = modelSize
                
                assert modelSize > 0, f"Model size must be greater then zero: {modelSize} Parameters"
                assert modelSize < self.max_parameters, f"Model size is too big: {modelSize} Parameters"
                assert modelSize is not None, f"Model size is None..."
                return True # Individual is valid
        except Exception as e:
                print(f"Error encountered when checking individual: {e}")
                return False # Individual is invalid



    def create_population(self):
        population = []
        # Generate individuals until the population reaches n_individuals, removing duplicates along the way
        while len(population) < self.n_individuals:
            candidate = self.create_random_individual() # Create a random individual
            if self.check_individual(candidate):
                population.append(candidate)
            
            population = self.remove_duplicates(population) # Remove duplicates
        return population


    def elite_models(self, k_best=1):
        """
        Get the k_best models from the current population.
        """
        sorted_pop = sorted(self, key=lambda individual: individual.fitness, reverse=True)
        topModels = [sorted_pop[i].copy() for i in range(k_best)]
        return topModels


    def evolve(self, mating_pool_cutoff=0.5, mutation_probability=0.85, k_best=1, n_random=3):
        """
        Generates a new population ensuring that the total number of individuals equals pop.n_individuals.
        
        Parameters:
            pop                  : List or collection of individuals. Assumed to have attributes: 
                                .n_individuals and .generation.
            mating_pool_cutoff   : Fraction determining the size of the mating pool (top percent of individuals).
            mutation_probability : The probability to use during mutation.
            k_best               : The number of best individuals from the current population to retain.
        
        Returns:
            new_population: A list representing the new generation of individuals.
            
        Note:
            Assumes that helper functions single_point_crossover(), mutation(), and create_random_individual() exist.
        """
        new_population = []
        self.generation += 1
        self.topModels = self.elite_models(k_best=k_best)


        # 2. Create the mating pool based on the cutoff from the sorted population
        sorted_pop = sorted(self, key=lambda individual: individual.fitness, reverse=True)
        mating_pool = sorted_pop[:int(np.floor(mating_pool_cutoff * self.n_individuals))].copy()
        assert len(mating_pool) > 0, "Mating pool is empty."
        
        # Generate offspring until reaching the desired population size
        while len(new_population) < self.n_individuals - n_random - k_best:
            try:
                parent1 = np.random.choice(mating_pool)
                parent2 = np.random.choice(mating_pool)
                assert parent1.parsed_layers != parent2.parsed_layers, "Parents are the same individual."
            except Exception as e:
                print(f"Error selecting parents: {e}")
                continue
            
            # a) Crossover:
            children = single_point_crossover([parent1, parent2])
            # b) Mutation:
            mutated_children = gene_mutation(children, mutation_probability)
            # c) Random choice of one of the mutated children
            for kid in mutated_children:
                kid.reset()
                if self.check_individual(kid):
                    new_population.append(kid)
                else:
                    pass


        # 3. Add random individuals to the new population
        while len(new_population) < self.n_individuals - k_best:
            try:
                individual = self.create_random_individual()
                model_representation, is_valid = self.build_model(individual.parsed_layers)
                if is_valid:
                    individual.model_size = int(self.evaluate_parameters(model_representation))
                    assert individual.model_size > 0, f"Model size is {individual.model_size}"
                    assert individual.model_size < self.max_parameters, f"Model size is {individual.model_size}"
                    assert individual.model_size is not None, f"Model size is None"
                    new_population.append(individual)
            except Exception as e:
                print(f"Error encountered when evolving population: {e}")
                continue
        
        
        # 4. Add the best individuals from the previous generation
        new_population.extend(self.topModels)
       

        assert len(new_population) == self.n_individuals, f"Population size is {len(new_population)}, expected {self.n_individuals}"
        self.population = new_population
        self._update_df()
        self.save_dataframe()
        self.save_population()
    

    def __getitem__(self, index):
        return self.population[index]


    def remove_duplicates(self, population):
        """
        Remove duplicates from the given population by replacing duplicates with newly generated unique individuals.

        Parameters:
            population (list): A list of individuals in the population.

        Returns:
            list: The updated population with duplicates removed.
        """
        unique_architectures = set()
        updated_population = []

        for individual in population:
            # Use the 'architecture' attribute if available, otherwise fallback to a default representation.
            arch = getattr(individual, 'architecture', None)
            if arch is None:
                # If no architecture attribute, use parsed_layers as unique identifier.
                arch = str(individual.parsed_layers)

            if arch not in unique_architectures:
                unique_architectures.add(arch)
                updated_population.append(individual)
            else:
                # Try to generate a unique individual up to 50 times
                for _ in range(50):
                    new_individual = classes.Individual(max_layers=self.max_layers)
                    new_arch = getattr(new_individual, 'architecture', None)
                    if new_arch is None:
                        new_arch = str(new_individual.parsed_layers)

                    if new_arch not in unique_architectures:
                        unique_architectures.add(new_arch)
                        updated_population.append(new_individual)
                        break
                else:
                    # After 50 attempts, keep the original duplicate as a fallback.
                    updated_population.append(individual)
        return updated_population
        
    
    def build_model(self, parsed_layers):
        """
        Build a model based on the provided parsed layers.

        This function creates an encoder using the parsed layers and constructs a model by combining
        the encoder with a head layer via the ModelConstructor. The constructed model is built to
        process inputs defined by the data module (dm).

        Parameters:
            parsed_layers: The parsed architecture configuration used by the encoder to build the network.

        Returns:
            A PyTorch model constructed with the encoder and head layer.
        """
        encoder = classes.generic_network.GenericNetwork(
                parsed_layers, 
                input_channels=self.dm.input_shape[0], 
                input_height=self.dm.input_shape[1], 
                input_width=self.dm.input_shape[2], 
                num_classes=self.dm.num_classes,
        )
        constructed_model = ModelConstructor(encoder, dm).model
        valid = ModelConstructor(encoder, dm).valid_model
        return constructed_model, valid
    
    
    def evaluate_parameters(self, model):
        num_params = sum(p.numel() for p in model.parameters())
        return num_params
    
    
    def _update_df(self):
            """
            Create a DataFrame from the population.

            Returns:
                pd.DataFrame: A DataFrame containing the population.
            """
            columns = ["Generation", "Layers", "Fitness", "Metric", "FPS", "Params"]
            data = []
            for individual in self.population:
                generation = self.generation
                parsed_layers = individual.parsed_layers
                fitness = individual.fitness
                iou = individual.iou
                fps = individual.fps
                model_size = individual.model_size
                data.append([generation, parsed_layers, fitness, iou, fps, model_size])
            
            df = pd.DataFrame(data, columns=columns).sort_values(by="Fitness", ascending=False)
            df.reset_index(drop=True, inplace=True)
            
            self.df = df
    
    
    def save_dataframe(self):
        path = f'./models_traced/src/df_population_{self.generation}.pkl'
        try:
            self.df.to_pickle(path)
            print(f"DataFrame saved to {path}")
        except Exception as e:
            print(f"Error saving DataFrame to {path}: {e}")
    
    
    def load_dataframe(self, generation):
        path = f'./models_traced/src/df_population_{generation}.pkl'
        try:
            df = pd.read_pickle(path)
            return df
        except Exception as e:
            print(f"Error loading DataFrame from {path}: {e}")
            return None
    
    
    def save_population(self):
        path = f'./models_traced/src/population_{self.generation}.pkl'
        try:
            with open(path, 'wb') as f:
                pickle.dump(self.population, f)
            print(f"Population saved to {path}")
        except Exception as e:
            print(f"Error saving population to {path}: {e}")
    
    
    def load_population(self, generation):
        path = f'./models_traced/src/population_{generation}.pkl'
        try:
            with open(path, 'rb') as f:
                population = pickle.load(f)
            return population
        except Exception as e:
            print(f"Error loading population from {path}: {e}")
            return None
    
    def __len__(self):
        return len(self.population)  



In [4]:
max_layers = 5
pop = Population(15, max_layers, dm=dm)
pop.initial_poll()

Skipping architecture, total parameters: 249812568 exceed the threshold of 100000000
Error encountered when checking individual: Encoder appears to have no parameters.
Architecture is valid, total parameters: 700
Skipping architecture, total parameters: 382326788 exceed the threshold of 100000000
Error encountered when checking individual: Encoder appears to have no parameters.
Architecture is valid, total parameters: 1095
Skipping architecture, total parameters: 129939754 exceed the threshold of 100000000
Error encountered when checking individual: Encoder appears to have no parameters.
Architecture is valid, total parameters: 73992
Architecture is valid, total parameters: 92848
Architecture is valid, total parameters: 590654
Error encountered when checking individual: Model size is too big: 592673 Parameters
Architecture is valid, total parameters: 124
Architecture is valid, total parameters: 302300
Error encountered when checking individual: Model size is too big: 303743 Parameters


In [5]:
print(pop.max_layers)

for individual in pop:
    individual.fitness = np.random.rand() # simulate training
pop._update_df()
pop.df

5


Unnamed: 0,Generation,Layers,Fitness,Metric,FPS,Params
0,0,"[{'layer_type': 'ConvAct', 'out_channels_coefficient': 5, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 5, 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.37}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.35}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.46}, {'layer_type': 'MaxPool'}]",0.96991,,,4943
1,0,"[{'layer_type': 'MBConvNoRes', 'expansion_factor': '6', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 9, 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}]",0.950714,,,1158
2,0,"[{'layer_type': 'ConvBnAct', 'out_channels_coefficient': 10, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ConvAct', 'out_channels_coefficient': 7, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}]",0.866176,,,31743
3,0,"[{'layer_type': 'MBConv', 'expansion_factor': '4', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 8, 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'MBConv', 'expansion_factor': '4', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '3', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.832443,,,7057
4,0,"[{'layer_type': 'Dropout', 'dropout_rate': 0.1}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvSE', 'out_channels_coefficient': 12, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvSE', 'out_channels_coefficient': 12, 'kernel_size': '3', 'stride': '1', 'padding': '2', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.731994,,,74859
5,0,"[{'layer_type': 'ResNetBlock', 'reduction_factor': '2', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '2', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 8, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}]",0.708073,,,511
6,0,"[{'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 8, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}]",0.601115,,,4601
7,0,"[{'layer_type': 'MBConv', 'expansion_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 5, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvSE', 'out_channels_coefficient': 8, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '6', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}]",0.598658,,,93091
8,0,"[{'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.31}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 12, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.37454,,,775
9,0,"[{'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 10, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 10, 'activation': 'GELU'}, {'layer_type': 'AvgPool'}]",0.212339,,,37385


In [6]:
pop.evolve(mating_pool_cutoff=0.5, mutation_probability=0.85, k_best=1, n_random=3)
pop.df


Architecture is valid, total parameters: 194
Architecture is valid, total parameters: 52310798
Error encountered when checking individual: Model size is too big: 52315553 Parameters
Architecture is valid, total parameters: 229834
Error encountered when checking individual: Model size is too big: 230365 Parameters
Architecture is valid, total parameters: 384
Architecture is valid, total parameters: 542
Architecture is valid, total parameters: 416
Architecture is valid, total parameters: 876
Architecture is valid, total parameters: 57635
Architecture is valid, total parameters: 308
Architecture is valid, total parameters: 3704
Architecture is valid, total parameters: 366288
Error encountered when checking individual: Model size is too big: 366885 Parameters
Architecture is valid, total parameters: 32964
Architecture is valid, total parameters: 472012
Error encountered when checking individual: Model size is too big: 472447 Parameters
Architecture is valid, total parameters: 30728
Archite

Unnamed: 0,Generation,Layers,Fitness,Metric,FPS,Params
0,1,"[{'layer_type': 'ConvAct', 'out_channels_coefficient': 10, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvSE', 'out_channels_coefficient': 4, 'kernel_size': '3', 'stride': '1', 'padding': '2', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '3', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvAct', 'out_channels_coefficient': 12, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.96991,,,4943
1,1,"[{'layer_type': 'Dropout', 'dropout_rate': 0.14}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.38}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.29}, {'layer_type': 'MaxPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 5, 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.33}, {'layer_type': 'MaxPool'}]",0.0,,,233
2,1,"[{'layer_type': 'Dropout', 'dropout_rate': 0.22}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.13}, {'layer_type': 'MaxPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 10, 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.0,,,453
3,1,"[{'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '3', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.22}, {'layer_type': 'MaxPool'}, {'layer_type': 'ConvAct', 'out_channels_coefficient': 11, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.46}, {'layer_type': 'AvgPool'}]",0.0,,,611
4,1,"[{'layer_type': 'MBConv', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.24}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.35}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '3', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.0,,,425
5,1,"[{'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'MBConv', 'expansion_factor': '6', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.37}, {'layer_type': 'AvgPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 10, 'activation': 'GELU'}, {'layer_type': 'MaxPool'}]",0.0,,,945
6,1,"[{'layer_type': 'ConvAct', 'out_channels_coefficient': 5, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 5, 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.37}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '2', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'MBConv', 'expansion_factor': '6', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}]",0.0,,,57818
7,1,"[{'layer_type': 'ConvAct', 'out_channels_coefficient': 5, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'Dropout', 'dropout_rate': 0.35}, {'layer_type': 'MaxPool'}]",0.0,,,341
8,1,"[{'layer_type': 'ConvAct', 'out_channels_coefficient': 8, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'MBConvNoRes', 'expansion_factor': '4', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'ResNetBlock', 'reduction_factor': '4', 'activation': 'ReLU'}, {'layer_type': 'MaxPool'}]",0.0,,,3755
9,1,"[{'layer_type': 'MBConvNoRes', 'expansion_factor': '5', 'activation': 'GELU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'ConvBnAct', 'out_channels_coefficient': 8, 'kernel_size': '3', 'stride': '1', 'padding': '1', 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}, {'layer_type': 'MBConv', 'expansion_factor': '6', 'activation': 'GELU'}, {'layer_type': 'MaxPool'}, {'layer_type': 'DenseNetBlock', 'out_channels_coefficient': 12, 'activation': 'ReLU'}, {'layer_type': 'AvgPool'}]",0.0,,,33591


In [7]:
pop.topModels[0].fitness

0.9699098521619943

Training of the model fresh created.

In [None]:
class NASTrainer:
    def __init__(self, population, idx, dm, lr, max_epochs=10):
        self.population = population
        self.idx = idx
        self.dm = dm
        self.lr = lr
        self.max_epochs = max_epochs
        
        # Build the model from the selected individual.
        layers = self.population[self.idx].parsed_layers
        self.constructed_model, is_valid = self.population.build_model(layers)
        if not is_valid:
            raise ValueError("Constructed model is not valid.")
        
        self.LM = classes.GenericLightningNetwork(
            model=self.constructed_model,
            num_classes=self.dm.num_classes,
            learning_rate=self.lr,
        )
    
    def train(self):
        self.trainer = pl.Trainer(
            max_epochs=self.max_epochs,
            accelerator="gpu" if torch.cuda.is_available() else "cpu"
        )
        # Train the lightning model
        self.trainer.fit(self.LM, self.dm)
        self.results = self.trainer.test(self.LM, self.dm)

    
    
    def save_model(self, save_torchscript=True, 
                   ts_save_path=None,
                   save_standard=True, 
                   std_save_path=None):
        # Use generation attribute from the Population object.
        gen = self.population.generation
        
        if ts_save_path is None:
            ts_save_path = f"models_traced/generation_{gen}/model_and_architecture_{self.idx}.pt"
        if std_save_path is None:
            std_save_path = f"models_traced/generation_{gen}/model_{self.idx}.pth"
        
        # Save the results to a text file.
        with open(f"models_traced/generation_{gen}/results_model_{self.idx}.txt", "w") as f:
            f.write("Test Results:\n")
            for key, value in self.results[0].items():
                f.write(f"{key}: {value}\n")
        
        # Prepare dummy input from dm.input_shape
        input_shape = self.dm.input_shape
        if len(input_shape) == 3:
            input_shape = (1,) + input_shape
        device = next(self.LM.parameters()).device
        example_input = torch.randn(*input_shape).to(device)
        
        self.LM.eval()  # set the model to evaluation mode
        
        if save_torchscript:
            traced_model = torch.jit.trace(self.LM.model, example_input)
            traced_model.save(ts_save_path)
            print(f"Scripted (TorchScript) model saved at {ts_save_path}")
        
        if save_standard:
            # Retrieve architecture code from the individual.
            arch_code = self.population[self.idx].architecture
            save_dict = {"state_dict": self.LM.model.state_dict()}
            if arch_code is not None:
                save_dict["architecture_code"] = arch_code
            torch.save(save_dict, std_save_path)
            print(f"Standard model saved at {std_save_path}")


from myFit import FitnessEvaluator
evaluator = FitnessEvaluator()

# Train the models in the population           
for idx in range(len(pop)): 
    nt = NASTrainer(population=pop, idx=idx, dm=dm, lr=1e-3, max_epochs=2)
    nt.train()
    nt.save_model()
    
    
    # Update the population with the results from the model training
    fps = nt.results[0]['fps']
    metric = nt.results[0]['test_mcc']
    ####
    pop[idx].iou = nt.results[0]['test_mcc']
    pop[idx].fps = nt.results[0]['fps']
    
    pop[idx].fitness = evaluator.weighted_sum_exponential(fps, metric)
    
    pop.df.loc[idx, 'Fitness'] = pop[idx].fitness
    pop.df.loc[idx, 'Metric'] = pop[idx].iou
    pop.df.loc[idx, 'FPS'] = pop[idx].fps
    
    pop.save_dataframe()
    pop.save_population()

        

In [None]:
pop.df

When making new generation, some important things:

- check model size below the thresh when creating new child, otherwise go for another tentative.
- retain best K model at each generation

In [None]:


# Example usage:
# new_pop = generate_new_population(pop, mating_pool_cutoff, mutation_probability, k_best=1)

In [None]:
new_population[2].parsed_layers

In [None]:
pop[2].parsed_layers

In [None]:
print("\n" * 20)
print(f"*** GENERATION {t} ***")
new_population = []

# Create a mating pool
mating_pool = population[:int(np.floor(mating_pool_cutoff * len(population)))].copy()
for i in range(int(np.ceil((1 - mating_pool_cutoff) * len(population)))):
    temp_individual = classes.Individual(max_layers=max_layers)
    mating_pool.append(temp_individual)

# Coupling and mating
couple_i = 0
while couple_i < len(mating_pool):
    parents = [mating_pool[couple_i], mating_pool[couple_i + 1]]
    children = single_point_crossover(parents=parents)
    children = mutation(children=children, mutation_probability=mutation_probability )
    new_population = new_population + children
    couple_i += 2

# Update the population
population = new_population.copy()
for i in population:
    i.architecture = i.chromosome2architecture(i.chromosome)
population = remove_duplicates(population=population, max_layers=max_layers)

# Inference

In [None]:
# Load the saved TorchScript model and test with a dummy input.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

save_path = "model_and_architecture.pt"
loaded_model = torch.jit.load(save_path, map_location=device)
loaded_model.eval()

# Ensure input is moved to the correct device
example_input = torch.randn(1, *dm.input_shape).to(device)
example_input = example_input.to(device)

with torch.no_grad():
    output = loaded_model(example_input)
print("Output from the loaded model:", output)