## Generate_distill_submission.py

In [None]:
import  pytorch_lightning  as  pl
from  pytorch_lightning . callbacks  import  EarlyStopping

# Third party libraries
import  torch
from  scipy . special  import  softmax
from  torch . utils . data  import  DataLoader
from  tqdm  import  tqdm

# User defined libraries
from  dataset  import  generate_transforms , PlantDataset
from  train  import  CoolSystem
from  utils  import  init_hparams , init_logger , seed_reproducer , load_data


if  __name__  ==  "__main__" :
    # Make experiment reproducible
    seed_reproducer ( 2020 )

    # Init Hyperparameters
    hparams  =  init_hparams ()

    # init logger
    logger  =  init_logger ( "kun_out" , log_dir = hparams . log_dir )

    # Load data
    data , test_data  =  load_data ( logger )

    # Generate transforms
    transforms  =  generate_transforms ( hparams . image_size ) # C'est ici qu'on fait le Data enhancement

    early_stop_callback  =  EarlyStopping ( monitor = "val_roc_auc" , patience = 10 , mode = "max" , verbose = True )

    # Instance Model, Trainer and train model
    model  =  CoolSystem ( hparams ) # initialisation du model
    trainer  =  pl . Trainer (       # entrainement du model 
        gpus = hparams . gpus ,
        min_epochs = 70 ,
        max_epochs = hparams . max_epochs ,
        early_stop_callback = early_stop_callback ,
        progress_bar_refresh_rate = 0 ,
        precision = hparams . precision ,
        num_sanity_val_steps = 0 ,
        profiler = False ,
        weights_summary = None ,
        use_dp = True ,
        gradient_clip_val = hparams . gradient_clip_val ,
    ) 

    submission  = []
    PATH  = [
        "logs_submit_distill/fold=0-epoch=59-val_loss=0.7352-val_roc_auc=0.9928.ckpt" ,
        "logs_submit_distill/fold=1-epoch=28-val_loss=0.8069-val_roc_auc=0.9918.ckpt" ,
        "logs_submit_distill/fold=2-epoch=28-val_loss=0.7605-val_roc_auc=0.9959.ckpt" ,
        "logs_submit_distill/fold=3-epoch=66-val_loss=0.7628-val_roc_auc=0.9850.ckpt" ,
        "logs_submit_distill/fold=4-epoch=32-val_loss=0.7845-val_roc_auc=0.9915.ckpt" ,
    ]

    # ================================================ ================================================= ===========
    # Test Submit
    # ================================================ ================================================= ===========
    
    # chargement du test set
    test_dataset  =  PlantDataset (
        test_data , transforms = transforms [ "train_transforms" ], soft_labels_filename = hparams . soft_labels_filename
    )
    test_dataloader  =  DataLoader (
        test_dataset , batch_size = 64 , shuffle = False , num_workers = hparams . num_workers , pin_memory = True , drop_last = False ,
    )

    # chargement du model entrainé
    for  path  in  PATH :
        model . load_state_dict ( torch . load ( path )[ "state_dict" ])
        model . to ( "cuda" )
        model . eval ()
        
        # prediction du test ser
        for  i  in  range ( 8 ):
            test_preds  = []
            labels  = []
            with  torch . no_grad ():
                for  image , label , times  in  tqdm ( test_dataloader ):
                    test_preds . append ( model ( image . to ( "cuda" )))  # prediction
                    labels . append ( label )

                labels  =  torch . cat ( labels )
                test_preds  =  torch . cat ( test_preds )
                submission . append ( test_preds . cpu (). numpy ())

    submission_ensembled  =  0
    for  sub  in  submission :
        submission_ensembled  +=  softmax ( sub , axis = 1 ) /  len ( submission )
    test_data . iloc [:, 1 :] =  submission_ensembled
    test_data . to_csv ( "submission_distill.csv" , index = False )

### Model.py

In [None]:
import torch
import torch.nn as nn
import pretrainedmodels

# definition de la fonction  de normalisation à utiliser 
def l2_norm(input, axis=1):
    norm = torch.norm(input, 2, axis, True)
    output = torch.div(input, norm)
    return output

class BinaryHead(nn.Module):
    def __init__(self, num_class=4, emb_size=2048, s=16.0):
        super(BinaryHead, self).__init__()
        self.s = s
        self.fc = nn.Sequential(nn.Linear(emb_size, num_class))

    def forward(self, fea):
        fea = l2_norm(fea)
        logit = self.fc(fea) * self.s
        return logit


# definition du gros model (knowledge distillation)
class se_resnext50_32x4d(nn.Module):
    def __init__(self):
        super(se_resnext50_32x4d, self).__init__()
        # chargement du pre-trained model
        self.model_ft = nn.Sequential(
            *list(pretrainedmodels.__dict__["se_resnext50_32x4d"](num_classes=1000, pretrained="imagenet").children())[
                :-2
            ]
        )
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) # pooling layer 
        self.model_ft.last_linear = None
        self.fea_bn = nn.BatchNorm1d(2048)
        self.fea_bn.bias.requires_grad_(False)
        self.binary_head = BinaryHead(4, emb_size=2048, s=1)
        self.dropout = nn.Dropout(p=0.2)
    
    def forward(self, x):
        
        img_feature = self.model_ft(x) # appel du pre-trained model
        img_feature = self.avg_pool(img_feature) # pooling layers
        img_feature = img_feature.view(img_feature.size(0), -1) 
        fea = self.fea_bn(img_feature) # premiere normalisation
        # fea = self.dropout(fea)
        output = self.binary_head(fea) # deuxieme normalisation

        return output

### Los_function.py

In [None]:
# definition de la loss function
class CrossEntropyLossOneHot(nn.Module):
    def __init__(self):
        super(CrossEntropyLossOneHot, self).__init__()
        self.log_softmax = nn.LogSoftmax(dim=-1) # log(softmax(x))

    def forward(self, preds, labels):
        return torch.mean(torch.sum(-labels * self.log_softmax(preds), -1))

### train.py

In [None]:
# Definition du petit model
class  CoolSystem ( pl . LightningModule ):
    def  __init__ ( self , hparams ):
        super (). __init__ ()
        self . hparams  =  hparams

        # Make the model initialization consistent every time, so as long as there is a re-initialization in the middle, the result will immediately go wrong
        seed_reproducer ( self . hparams . seed )

        self . model  =  se_resnext50_32x4d () # initialisation du pre-trained model
        self . criterion  =  CrossEntropyLossOneHot () # initialisation de la loss
        self . logger_kun  =  init_logger ( "kun_in" , hparams . log_dir ) # initialisation du log file

    # initialise le model lors de l'apperl de la classe
    def  forward ( self , x ):
        return  self . model ( x )

    # initialisation de l'optimizer
    def  configure_optimizers ( self ):
        # the optimize
        self . optimizer  =  torch . optim . Adam ( self . parameters (), lr = 0.001 ,
                                                   betas = ( 0.9 , 0.999 ), eps = 1e-08 ,
                                                   weight_decay = 0 )
        # function who optimize 
        self . scheduler  =  WarmRestart ( self . optimizer , T_max = 10 ,
                                          T_mult = 1 , eta_min = 1e-5 )
        return [ self . optimizer ], [ self . scheduler ]

    # entrainement du model -> Training loop
    def  training_step ( self , batch , batch_idx ):
        step_start_time  =  time ()
        images , labels , data_load_time  =  batch

        scores  =  self ( images )
        loss  =  self . criterion ( scores , labels )
        # self.logger_kun.info(f"loss: {loss.item()}")
        #! can only return scalar tensor in training_step
        # must return key -> loss
        # optional return key -> progress_bar optional (MUST ALL BE TENSORS)
        # optional return key -> log optional (MUST ALL BE TENSORS)
        data_load_time  =  torch . sum ( data_load_time )

        return {
            "loss" : loss ,
            "data_load_time" : data_load_time ,
            "batch_run_time" : Torch . Tensor ([ Time () -  step_start_time  +  data_load_time ]). to ( data_load_time . Device ),
        }

    def  training_epoch_end ( self , outputs ):
        # outputs is the return of training_step
        train_loss_mean  =  torch . stack ([ output [ "loss" ] for  output  in  outputs ]). mean ()
        self . data_load_times  =  torch . stack ([ output [ "data_load_time" ] for  output  in  outputs ]). sum ()
        self . batch_run_times  =  torch . stack ([ output [ "batch_run_time" ] for  output  in  outputs ]). sum ()

        self . current_epoch  +=  1
        
        if  self . current_epoch  < ( self . trainer . max_epochs  -  4 ):
            self . scheduler  =  warm_restart ( self . scheduler , T_mult = 2 )

        return { "train_loss" : train_loss_mean }
    
    
    # test du model  -> Evaluating Loop 
    def  validation_step ( self , batch , batch_idx ):
        step_start_time  =  time ()
        images , labels , data_load_time  =  batch
        data_load_time  =  torch . sum ( data_load_time )
        scores  =  self ( images )
        loss  =  self . criterion ( scores , labels )

        # must return key -> val_loss
        return {
            "val_loss" : loss ,
            "scores" : scores ,
            "labels" : labels ,
            "data_load_time" : data_load_time ,
            "batch_run_time" : Torch . Tensor ([ Time () -  step_start_time  +  data_load_time ]). to ( data_load_time . Device ),
        }

    def  validation_epoch_end ( self , outputs ):
        # compute loss
        val_loss_mean  =  torch . stack ([ output [ "val_loss" ] for  output  in  outputs ]). mean ()
        self . data_load_times  =  torch . stack ([ output [ "data_load_time" ] for  output  in  outputs ]). sum ()
        self . batch_run_times  =  torch . stack ([ output [ "batch_run_time" ] for  output  in  outputs ]). sum ()

        # compute roc_auc
        scores_all  =  torch . cat ([ output [ "scores" ] for  output  in  outputs ]). cpu ()
        labels_all  =  torch . round ( torch . cat ([ output [ "labels" ] for  output  in  outputs ]). cpu ())
        val_roc_auc  =  roc_auc_score ( labels_all , scores_all )

        # terminal logs
        self . logger_kun . info (
            f" { self . hparams . fold_i } - { self . current_epoch } | "
            f"lr: { self . scheduler . get_lr ()[ 0 ]:.6f } | "
            f"val_loss: { val_loss_mean :.4f } | "
            f"val_roc_auc: { val_roc_auc :.4f } | "
            f"data_load_times: { self . data_load_times :.2f } | "
            f"batch_run_times: { self . batch_run_times :.2f } "
        )
        # f"data_load_times: {self.data_load_times:.2f} | "
        # f"batch_run_times: {self.batch_run_times:.2f}"
        # must return key -> val_loss
        return { "val_loss" : val_loss_mean , "val_roc_auc" : val_roc_auc }