In [1]:
import gc
import pandas as pd
import numpy as np
import os
import seaborn as sns
import warnings
import torch
import json
from imutils import paths

In [2]:
%run utils.ipynb
%run DatasetManager.ipynb
%run TransformManager.ipynb
%run ValidationManager.ipynb
%run DataLoaderManager.ipynb
%run ModelManager.ipynb



In [3]:
class SegmentationManager():
    """
    Manager to use the ModelManager class.
    """
    LIBRARIES = {
        "smp": ModelManagerSMP,
        "semtorch": ModelManagerSemtorch,
        "segmentron": ModelManagerSegmenTron,
        "mmsegmentation": ModelManagerMMSegmentation
    }
    
    def __init__(self, dataset_manager,
                 transform_manager = None,
                 validation_manager = None,
                 metrics = [DiceMulti()],
                 metrics_order = [(1, True)],
                 gpu_device = 0
                ):
        """
        Description:
        Builds the interface.
        
        Parameters:
        dataset_manager (DatasetManager | str, None): the DatasetManager or the root_dir for the data.
        transform_manager (TransformManager, TransformManager): the TransfomManager.
        validation_manager (ValidationManager, ValidationManagerTrainValTest): the ValidationManager.
        metrics (List[function]): list of metrics used to evaluate the models. train_loss and valid_loss are appended to this list in the indexes 0 and 1 respectively.
        metrics_order (List[Tuple(int, boolean)], [(1, True)]): set the order of the metrics to determine when a model is better than an other one. valid_loss is used by default (index 1), and this function must be minimized (true).
        gpu_device (int): the used gpu identifier.
        
        Returns:
        None.
        """
        self.dataset_manager_ = dataset_manager if type(dataset_manager) != str else SegmentationManager.build_default_dataset(dataset_manager)
        self.transform_manager_ = transform_manager if transform_manager else SegmentationManager.build_default_transformation()
        self.validation_manager_ = validation_manager if validation_manager else SegmentationManager.build_default_validation()
        self.metrics_ = metrics
        
        # gets the correct metric indexes (+3 because the file has 3 columns before the first metric)
        self.metrics_order_ = [(value + 3, must_minimize) for value, must_minimize in metrics_order]
        
        # best model founded at this moment
        self.best_model_ = None
        
        # fixes the gpu
        torch.cuda.set_device(torch.device(f"cuda:{gpu_device}"))
        self.gpu_device_ = gpu_device
    
    def get_codes_template():
        """
        Description:
        Gets the template for the codes file for the dataset.
        
        Parameters:
        None.
        
        Returns:
        d (dict): the template.
        """
        return DatasetManager.get_codes_template()
    
    def build_default_dataset(root_dir, img_prefix = "", mask_prefix = ""):
        """
        Description:
        Builds a default DatasetManager that works good enougth with the SegmentationManager.
        
        Parameters:
        root_dir (str): the root path to the data. It must contain: "images" and "masks" directories and "codes.json" file. To see the codes.json template, type SegmentationManager.get_codes_template().
        img_prefix (str, ""): the prefix used in the images.
        mask_prefix (str, ""): the prefix used in the masks.
        
        Returns:
        dm (DatasetManager): the built DatasetManager.
        """
        return DatasetManager(root_dir, img_prefix = img_prefix, mask_prefix = mask_prefix,
                              img_suffix = "", mask_suffix = "", delete_prefixes = False,
                              img_map = None, mask_map = None, check_maps = True,
                              check_map_fails = None, convert_masks = True, noise_class = 0)
    
    def build_default_transformation(transformations = []):
        """
        Description:
        Builds the default transformations list.
        
        Parametters:
        transformations (list[albumentations]): a list of albumentations. By default: RandomRotate90 (50%), Flip (50%), RandomBrightnessContrast (50%). If you want to use this default configuration and something more, use a list like ["default", YourTransformation1, YourTransformation2, ...].
        
        Returns:
        tm (TransformationManager): the built TransformationManager.
        """
        return TransformManager(transformations = transformations)
    
    def build_default_validation(mode = "kfold", random_state = None):
        """
        Description:
        Builds the default validation mode.
        
        Parameters:
        mode (str): the mode used to validate. "traintest" or "kfold".
        random_state (int): random seed used to shuffle the data.
        
        Returns:
        v (ValidationManager): the built validator.
        """
        if mode == "traintest":
            warnings.warn("Using the method traintest may cause data leakage in combination with other features like callbacks.", UserWarning)
            validator = ValidationManagerTrainValTest(train_size = 0.75, val_size = 0.15, test_size = 0.1,
                                                      shuffle = True, random_state = random_state);
        elif mode == "kfold":
            validator = ValidationManagerKFold(train_size = 0.9, test_size = 0.1, n_splits = 5,
                                               shuffle = True, random_state = random_state);
        
        return validator
    
    @AOP.excepter(AttributeError)
    def __get_library__(architecture, backbone, weights = WEIGHTS.NONE, library = None):
        """
        Description:
        Gets the first library that can builds the model.
        
        Parameters:
        architecture (str): the model's architecture.
        backbone (str): the model's backbone.
        weights (str): the model's weights.
        library (str): the finded library. This parameter is used if the user wants to use a specific library.
        
        Returns:
        library (str, str, str | None, str | None): the library and the configuration that is buildable.
        """
        if type(library) is str and library not in SegmentationManager.LIBRARIES:
            raise AttributeError(f"{library} not in SegmentationManager.LIBRARIES")

        _libraries = {library : SegmentationManager.LIBRARIES[library]} if library else SegmentationManager.LIBRARIES
        
        for library in _libraries:
            value = _libraries[library].is_buildable(architecture, backbone, weights)
            if value:
                return (library, *value)
    
    def __start_fastai__(self, batch_size):
        """
        Description:
        Gets the dataloaders from the data to train a fastai learner.
        
        Parameters:
        batch_size (int): the size of each batch.
        
        Returns:
        dictionary (dict): a dict with each fold and dataloader.
        """
        # from generic to fastai albumentations
        transform_manager = TransformManagerFastai.from_transform_manager(self.transform_manager_)

        # gets the dataloaders
        dls = DataLoaderManager(self.dataset_manager_,
                                transforms = transform_manager,
                                validation = self.validation_manager_,
                                batch_size = batch_size
                               )
        
        # gets the dataloaders
        dls = dls.get_dataloaders()
        
        # if the len of the dls dict is 3, it is a traintest evaluation. We do not want to retraing the entire model twice.
        if len(dls) == 3:
            dls["test"] = dls["f_1"]
            del dls["f_1"]

        # gets the validation dls for model validation
        val_dls = dls["validation"]
        del dls["validation"]
        
        # returns the dataloaders
        return dls, val_dls
    
    def __save_process__(self, model_name, fold):
        """
        Description:
        Saves the process of training.
        
        Parameters:
        model_name (str): the model's name.
        fold (str): the fold's name.
        
        Returns:
        b (boolean): if the process saved is corresponds to the best model.
        """
        history_file = osp.join(self.dataset_manager_.root_dir_, "history.csv")
        summary_file = osp.join(self.dataset_manager_.root_dir_, "summary.csv")
        if osp.isfile(history_file):
            # loads the history file
            df_new = pd.read_csv(history_file)
            
            # deletes the time column
            df_new = df_new.iloc[:, :-1]
            
            # creates the model_name and fold columns
            df_new["model_name"] = [model_name] * len(df_new)
            df_new["fold"] = [fold] * len(df_new)
            
            # sort the columns of the dataframe model + fold + others
            df_new = df_new[["model_name", "fold", *df_new.columns[:-2]]]

            # loads the summary
            if osp.isfile(summary_file):
                df = pd.read_csv(summary_file)
            else:
                df = pd.DataFrame(columns = ["model_name", "fold", "epoch", "train_loss", "valid_loss", *df_new.columns[5:]])
            
            df = pd.concat([df, df_new])
            df.to_csv(summary_file, index = False)

            # deletes the history file
            os.remove(history_file)

            if fold == "test":
                # gets all the metrics indexes
                metrics_indexes = list(map(lambda t: t[0], self.metrics_order_))
                metrics_names = [metric for index, metric in enumerate(df.columns) if index in metrics_indexes]
                metrics_sorting = list(map(lambda t: t[1], self.metrics_order_))

                # sorts the dataframe by the metrics and orders and get the best model
                df_sorted = df.sort_values(metrics_names, ascending = metrics_sorting)
                
                # gets only the testing folds
                df_sorted[df_sorted.fold == "test"]
                
                # determines if the model is the best model
                best = model_name if df_sorted.empty else df_sorted[df_sorted.fold == "test"].iloc[0,0]

                # determines if the model is the best
                return model_name == best
            else:
                return False
    
    def __fix_validation__(self):
        """
        Description:
        Fixes the summary.csv for validation results.
        
        Parameters:
        None.
        
        Returns:
        None.
        """
        history_file = osp.join(self.dataset_manager_.root_dir_, "history.csv")
        if osp.isfile(history_file):
            # loads the file
            df_new = pd.read_csv(history_file)

            # fixes it
            index = len(df_new.columns) - 1
            while index > 1:
                df_new.iloc[:, index] = df_new.iloc[:, index - 1]
                index -= 1
            
            df_new["epoch"] = np.nan
            df_new["train_loss"] = np.nan

            # saves it
            df_new.to_csv(history_file, index = False)
            
    def __fix_mmsegmentation__(self):
        """
        Description:
        Converts mmsegmentation log file into a history.csv-like file.
        
        Parameters:
        None.
        
        Returns:
        None.
        """
        with open(osp.join(self.dataset_manager_.root_dir_, "None.log.json"), "r") as f:
            data = [json.loads(row) for row in f.read().split("\n")[1:-1]]
            df = pd.DataFrame(data)
            df = pd.DataFrame(df.groupby("epoch").mean().to_records())
            df["train_loss"] = None
            df["epoch"] -= 1
            df = df[["epoch", "train_loss", "loss", "mIoU", "mAcc"]]
            df.columns = ["epoch", "train_loss", "valid_loss", "mIoU", "mAcc"]
        
        os.remove(osp.join(self.dataset_manager_.root_dir_, "None.log.json"))
        df.to_csv(osp.join(self.dataset_manager_.root_dir_, "history.csv"), index = False)
            
    def __clear_memory__():
        """
        Description:
        Deletes all the memory in GPU that is not referenced any more.
        
        Parameters:
        None.
        
        Returns:
        None.
        """
        gc.collect()
        torch.cuda.empty_cache()
    
    def __build_model__(self, library, name, architecture, backbone, weights, dls, learning_rate):
        """
        Description:
        Creates the model for a given library.
        
        Parameters:
        library (str): the library.
        name (str): the model's name.
        architecture (str): the model's architecture.
        backbone (str): the model's backbone.
        weights (str): the model's weights.
        dls (DataLoader | dict): the data distribution used to the train.
        learning_rate (float): the learning rate value.
        """
        if library == "semtorch":
            return ModelManagerSemtorch(name = name,
                                        architecture = architecture,
                                        backbone = backbone,
                                        dls = dls,
                                        root_dir = self.dataset_manager_.root_dir_,
                                        checkpoints_dir = "checkpoint",
                                        num_classes = len(self.dataset_manager_.class_names_),
                                        loss_func = None,
                                        opt_func = fastai.optimizer.Adam,
                                        lr = learning_rate,
                                        image_size = None,
                                        metrics = self.metrics_,
                                        moms = (0.95, 0.85, 0.95),
                                        cbs = None
                                       )
        elif library == "smp":
            return ModelManagerSMP(name = name,
                                   architecture = architecture,
                                   backbone = backbone,
                                   weights = weights,
                                   dls = dls,
                                   root_dir = self.dataset_manager_.root_dir_,
                                   checkpoints_dir = "checkpoint",
                                   num_classes = len(self.dataset_manager_.class_names_),
                                   loss_func = None,
                                   opt_func = fastai.optimizer.Adam,
                                   lr = learning_rate,
                                   metrics = self.metrics_,
                                   moms=(0.95, 0.85, 0.95),
                                   cbs=None
                                  )
        elif library == "segmentron":
            return ModelManagerSegmenTron(name = name,
                                          architecture = architecture,
                                          backbone = backbone,
                                          dls = dls,
                                          root_dir = self.dataset_manager_.root_dir_,
                                          checkpoints_dir = "checkpoint",
                                          num_classes = len(self.dataset_manager_.class_names_),
                                          loss_func = None,
                                          opt_func = fastai.optimizer.Adam,
                                          lr = learning_rate,
                                          metrics = self.metrics_,
                                          moms = (0.95, 0.85, 0.95),
                                          cbs = None
                                         )
        elif library == "mmsegmentation":
            return ModelManagerMMSegmentation(name = self.dataset_manager_.name_,
                                              architecture = architecture,
                                              backbone = None,
                                              weights = weights,
                                              root_dir = self.dataset_manager_.root_dir_,
                                              batch_size = dls["batch_size"],
                                              num_classes = len(self.dataset_manager_.class_names_),
                                              train_pipeline = dls["train_pipeline"],
                                              test_pipeline = dls["test_pipeline"],
                                              data_split = dls["data_split"],
                                              gpu_device = self.gpu_device_
                                              )
        else:
            return None
    
    @AOP.logger("Training the fold FOLD.", when = "before")
    def __train_fold_fastai__(self, name, library, architecture, backbone, weights, fold, dls, val_dls,
                       mode = "fine_tune", n_epochs = 10, n_freeze_epochs = 2, learning_rate = "best"):
        """
        Description:
        Trains a fold for a model.

        Parameters:
        name (str): the model's name.
        library (str): the model's library
        architecture (str): the model's architecture.
        backbone (str): the model's backbone.
        weights (str): the model's weights.
        fold (str): the fold whitch is been training.
        dls (DataLoader): the data distribution used to the train.
        val_dls (DataLoader): the data used for validation.
        mode (str, fit): the type of training. fit, fit_one_cycle or fine_tune.
        n_epochs (int, 10): the number of epochs to train.
        n_freeze_epochs (int, 2): if the mode of training is fine_tune, the number of freeze epochs.
        learning_rate (float): the learning rate value.
        
        Returns:
        lr (float): the value of the learning rate.
        """
        # builds the model
        model = self.__build_model__(library, name, architecture, backbone, weights, dls, learning_rate)

        if learning_rate == "best":
            learning_rate = model.lr_

        # trains it
        if mode == "fit":
            model.fit(n_epochs = n_epochs)
        elif mode == "fit_one_cycle":
            model.fit_one_cycle(n_epochs = n_epochs)
        elif mode == "fine_tune":
            model.fine_tune(n_epochs = n_epochs, n_freeze_epochs = n_freeze_epochs)

        is_best = False
        if fold == "test":
            # evaluate the model
            is_best = self.__save_process__(name, fold)
            model.validate(val_dls)
            self.__fix_validation__()

        # saves the process
        is_best = self.__save_process__(name, fold) or is_best

        # if the model is the best one, saves it
        if is_best:
            # remove the previous best model
            if self.best_model_:
                os.remove(osp.join(self.dataset_manager_.root_dir_, "checkpoint", self.best_model_))

            # saves this one
            model.save(name = name)
            self.best_model_ = name + ".pth"

        return learning_rate
    
    @AOP.logger("Training the fold FOLD.", when = "before")
    def __train_fold_mmsegmentation__(self, name, arch_back, weights, batch_size, train_pipeline, test_pipeline,
                                      data_split, fold, mode = "fine_tune", n_epochs = 10):
        """
        Description:
        Trains a fold for a model.

        Parameters:
        name (str): the model's name.
        architecture (str): the model's architecture and backbone.
        weights (str): the model's weights.
        batch_size (int): the batch size.
        train_pipeline (list): the pipeline used for training.
        test_pipeline (list): the pipeline used for testing.
        data_split (dict): the directory structure used for the training.
        fold (str): the fold which is been training.
        mode (str, fit): the type of training. fit, fit_one_cycle or fine_tune.
        n_epochs (int, 10): the number of epochs to train.
        
        Returns:
        None.
        """
        # builds the model
        dls = {
            "batch_size": batch_size,
            "train_pipeline": train_pipeline,
            "test_pipeline": test_pipeline,
            "data_split": data_split
        }
        model = self.__build_model__("mmsegmentation", name, arch_back, None, weights, dls, None)

        # trains it
        if mode == "fit":
            model.fit(n_epochs = n_epochs)
        elif mode == "fit_one_cycle":
            model.fit_one_cycle(n_epochs = n_epochs)
        elif mode == "fine_tune":
            model.fine_tune(n_epochs = n_epochs, n_freeze_epochs = None)

        # saves the process
        self.__fix_mmsegmentation__()
        self.__save_process__(name, fold)
        
        # creates the checkpoints dir
        checkpoint = osp.join(self.dataset_manager_.root_dir_, "checkpoint")
        checkpoint_mmsegmentation = osp.join(checkpoint, "mmsegmentation")
        for directory in [checkpoint, checkpoint_mmsegmentation]:
            if not osp.isdir(directory):
                os.mkdir(directory)  

        # move de trained model to the checkpoints file
        models_files = paths.list_files(self.dataset_manager_.root_dir_, validExts = ".pth")
        for file in models_files:
            model_name = osp.basename(file)
            if model_name == "latest.pth":
                os.rename(file, osp.join(checkpoint_mmsegmentation, name + ".pth"))
            else:
                os.remove(file)
                
    @AOP.logger("Training the model MODEL.", when = "before")
    @AOP.excepter(LibraryNotFound, ignore = True)
    def __train_model__(self, model, batch_size = 4, mode = "fine_tune", n_epochs = 10, n_freeze_epochs = 2):
        """
        Description:
        Trains a model.
        
        Parameters:
        model (tuple(str, str, str, str, float | str): the metamodel description.
        batch_size (int, 4): the number of images showed to the model by batch.
        mode (str, fit): the type of training. fit, fit_one_cycle or fine_tune.
        n_epochs (int, 10): the number of epochs to train.
        n_freeze_epochs (int, 2): if the mode of training is fine_tune, the number of freeze epochs.
        
        Returns:
        name (str): the model's name.
        """
        # gets the params
        name, architecture, *hiperparams = model
        backbone = BACKBONE.NONE
        weights = WEIGHTS.NONE
        learning_rate = "best"
        library = None
        if len(hiperparams) >= 1:
            backbone = hiperparams[0]
        if len(hiperparams) >= 2:
            weights = hiperparams[1]
        if len(hiperparams) >= 3:
            learning_rate = hiperparams[2]
        if len(hiperparams) >= 4:
            library = hiperparams[3]
            print(f"[LOGGER]: Forced to use {library}.")
                
        # gets the library which can builds this model
        value = SegmentationManager.__get_library__(architecture, backbone, weights, library = library)
        if value:
            library, architecture, backbone, weights = value
        else:
            library = None
        
        # training for fastai
        if library in ["semtorch", "smp", "segmentron"]:
            # gets the dataloaders
            dls, val_dls = self.__start_fastai__(batch_size = batch_size)

            # trains the model
            for fold, dls in dls.items():
                learning_rate = self.__train_fold_fastai__(name = name, library = library, architecture = architecture,
                                                    backbone = backbone, weights = weights, fold = fold, dls = dls,
                                                    val_dls = val_dls, mode = mode, n_epochs = n_epochs,
                                                    n_freeze_epochs = n_freeze_epochs, learning_rate = learning_rate)
                SegmentationManager.__clear_memory__()
            
        elif library == "mmsegmentation":
            # gets the splits
            if self.dataset_manager_.__class__.__name__ != "DatasetManagerMMSegmentation":
                self.dataset_manager_ = DatasetManagerMMSegmentation.from_dataset_manager(self.dataset_manager_, name)
            else:
                self.dataset_manager_.name_ = name
            
            self.dataset_manager_.build_dataset()
            transformations = TransformManagerMMSegmentation.from_transform_manager(self.transform_manager_).get_pipeline()
            validation = self.validation_manager_.split(self.dataset_manager_)
            val = [
                {"train": validation[f"f_{index}"]["train"], "val": validation[f"f_{index}"]["val"], "test": validation["test"]}
                for index in range(1, len(validation))
            ]
            
            # trains the model
            for fold, data_split in enumerate(val):
                self.__train_fold_mmsegmentation__(name = name, arch_back = architecture, weights = weights,
                                                   batch_size = batch_size, train_pipeline = transformations[0],
                                                   test_pipeline = transformations[1], data_split = data_split,
                                                   fold = f"f_{fold+1}", mode = mode, n_epochs = n_epochs)
                SegmentationManager.__clear_memory__()
                
        else:
            raise LibraryNotFound("The model configuration does not correspond to any library configuration.")
    
    @AOP.logger("Starting the multiple training.", when = "before")
    @AOP.logger("Multiple training ended.")
    @AOP.excepter(TypeError)
    def multiple_train(self, models, batch_size = 4,
                       mode = "fine_tune", n_epochs = 10, n_freeze_epochs = 2):
        """
        Description:
        Trains multiple models.
        
        Parameters:
        models (list[(str, str, str, str, str | float)]): a list with all the metadata of the models to train.
        batch_size (int, 4): the number of images showed to the model by batch.
        mode (str, fit): the type of training. fit, fit_one_cycle or fine_tune.
        n_epochs (int, 10): the number of epochs to train.
        learning_rate (float | str | slice, "best"): the learning rate used in training.
        n_freeze_epochs (int, 2): if the mode of training is fine_tune, the number of freeze epochs.
        moms (Tuple[float]): the momentums used in fit_one_cycle training.

        Returns:
        None.
        """
        names = set(map(lambda t: t[0], models))
        if len(names) != len(models):
            raise TypeError("The models names must be unique.")
            
        for model in models:
            self.__train_model__(model = model, batch_size = batch_size,
                                 mode = mode, n_epochs = n_epochs, n_freeze_epochs = n_freeze_epochs)
            SegmentationManager.__clear_memory__()
        
        
    def summary(self, models = "all", folds = "test", epoch = "test", limit = None, **metrics):
        """
        Description:
        Filters the summary log to shows the results.
        
        Parameters:
        models (str | list[str]): the models to show. If there is only one can be the string (it will check if the str is in the model name). If it is a list, all the names in the list will be checked.
        folds (str | list[str]): the folds to show. Same as the models param.
        epoch (int): the max. epoch to show. Shows all the epochs before this limit. If -1, shows all the epochs; if "test", shows only the last one.
        limit (int): the amount of rows to be shown. All by default.
        metrics (metric_name = str | boolean; ...): the type of order for this metric. "asc" or "desc". If it is a boolean, if the data must be ascending ordered by this metric. The order in the list is important to the sort. You can add "model_name", "folds", "epoch", "train_loss" and "valid_loss" as well.
        
        Returns:
        df (DataFrame): the filtered DataFrame of results.
        """
        summary_file = osp.join(self.dataset_manager_.root_dir_, "summary.csv")
        if osp.isfile(summary_file):
            df = pd.read_csv(summary_file)
            
            # filter by model
            if models != "all":
                if type(models) is str:
                    models = [models]
                _dfs = [df[[model in model_name for model_name in df.model_name]] for model in models]
                df = pd.concat(_dfs)
            
            # filter by fold
            if folds != "all":
                if type(folds) is str:
                    folds = [folds]
                _dfs = [df[[fold_ in fold for fold in df.fold]] for fold_ in folds]
                df = pd.concat(_dfs)
            
            if epoch == "test":
                df = df[df["epoch"].isna()]
            elif epoch != -1:
                df = df[df.epoch <= epoch]
                    
            # order by metrics
            if metrics:
                metrics_names = list(map(lambda t: t[0], metrics.items()))
                metrics_orders = list(map(lambda t: t[1] if type(t[1]) is bool else True if t[1] == "asc" else False, metrics.items()))
                df.sort_values(metrics_names, ascending = metrics_orders, inplace = True)
                
            return df.drop_duplicates().reset_index(drop = True).head(limit)
    
    def plot_train_valid(self, models = "all", metric = "valid_loss"):
        """
        Description:
        Shows the training and validation plot.
        
        Parameters:
        models (str | list): the models to add to the plot.
        metric (str): the metric to plot.
        
        Returns:
        None.
        """
        summary_file = osp.join(self.dataset_manager_.root_dir_, "summary.csv")
        if osp.isfile(summary_file):
            df = pd.read_csv(summary_file)
            
            if models != "all":
                if type(models) is str:
                    models = [models]
                _dfs = [df[[model in model_name for model_name in df.model_name]] for model in models]
                df = pd.concat(_dfs)

            df.query("fold != 'test'")
            
            if not df.empty:
                sns.boxplot(x = "model_name", y = metric, data = df)