In [1]:
import os
import gc
import time
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from pylab import rcParams
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.cuda.amp as amp
from torch.utils.data import  Dataset

%matplotlib inline
rcParams['figure.figsize'] = 20, 8
device = torch.device('cuda')
torch.backends.cudnn.benchmark = True

# Config

In [2]:

n_folds = 5

len_seq_bowl = 128 # all abdomen
len_seq_lsk = 24 # liver/spleen/kidney


init_lr = 1e-2

eta_min = 0
batch_size = 64

data_dir_bowel = './logits2b'
data_dir_lsk = './logits2lstm/'
num_workers = 6
n_epochs = 5

log_dir = './logs'
model_dir = './models_stage3_all'
os.makedirs(log_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

# DataFrame

In [3]:
from sklearn.model_selection import KFold

organ_dict = {
    0: "liver",
    1: "spleen",
    2: 'kidney',
    3: "bowel", 
    4: 'extravasation'
}

df_p = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv')


df_s=pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')

df_p['fold'] = -1
kf = KFold(5,shuffle=True,random_state=42)
for fold, (train_idx, valid_idx) in enumerate(kf.split(df_p,df_p)):
    df_p.loc[valid_idx, 'fold'] = fold



df=df_p.merge(df_s,on="patient_id")
df["count"]=df.groupby(["patient_id"])["patient_id"].transform("count")


pid=[]
sid = []
cs = []
label = []
fold = []
hu=[]
counts=[]
lbowel=[]
lliver=[]
lspleen=[]
lkidney=[]
lextra=[]
lany=[]
for row in df.to_dict(orient="records"):
        pid.append(row["patient_id"])
        sid.append(row["series_id"])
        hu.append(row["aortic_hu"])
        lab_bowel=row["bowel_injury"]
        lab_extravasation=row["extravasation_injury"]
        lab_liver=np.argmax([row["liver_healthy"],row["liver_low"],row["liver_high"]])
        lab_spleen=np.argmax([row["spleen_healthy"],row["spleen_low"],row["spleen_high"]])
        lab_kidney=np.argmax([row["kidney_healthy"],row["kidney_low"],row["kidney_high"]])
        lab_any_injury=row["any_injury"]

        la=[lab_liver,lab_spleen,lab_kidney,lab_bowel,lab_extravasation]
        label.append(la)
        lliver.append(lab_liver)
        lspleen.append(lab_spleen)
        lkidney.append(lab_kidney)
        lbowel.append(lab_bowel)
        lextra.append(lab_extravasation)
        lany.append(lab_any_injury)
        fold.append(row["fold"])
        counts.append(row["count"])

df = pd.DataFrame({
    'patient_id': pid,
    'series_id': sid,
    'hu':hu,
    'label': label,
    "liver":lliver,
    "spleen":lspleen,
    "kidney":lkidney,
    'bowel':lbowel,
    "extravasation":lextra,
    "any_injury":lany,
    'fold': fold,
    'counts':counts,
})
df.set_index(['patient_id', 'series_id'], inplace=True,drop=False)
df.index.names=["pid","sid"]
df.sort_values(by=["patient_id","hu"],inplace=True)



In [4]:

df


Unnamed: 0_level_0,Unnamed: 1_level_0,patient_id,series_id,hu,label,liver,spleen,kidney,bowel,extravasation,any_injury,fold,counts
pid,sid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
19,14374,19,14374,174.00,"[0, 0, 0, 0, 0]",0,0,0,0,0,0,3,1
26,41997,26,41997,101.00,"[0, 0, 0, 0, 0]",0,0,0,0,0,0,2,2
26,18881,26,18881,255.00,"[0, 0, 0, 0, 0]",0,0,0,0,0,0,2,2
33,55570,33,55570,105.00,"[1, 0, 0, 1, 1]",1,0,0,1,1,1,1,1
43,36714,43,36714,54.75,"[0, 0, 0, 1, 1]",0,0,0,1,1,1,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65456,40781,65456,40781,162.00,"[0, 2, 0, 1, 0]",0,2,0,1,0,1,2,1
65495,8371,65495,8371,248.00,"[0, 0, 0, 0, 0]",0,0,0,0,0,0,1,1
65504,55928,65504,55928,144.00,"[1, 2, 0, 0, 0]",1,2,0,0,0,1,2,2
65504,50212,65504,50212,276.00,"[1, 2, 0, 0, 0]",1,2,0,0,0,1,2,2


# Dataset

In [5]:
from ast import literal_eval

def extract_list(df):
    for i in range(5):
        df["logits"]=df.logits.str.replace("  "," ")
    df["logits"]=df.logits.str.replace("[ ","")
    df["logits"]=df.logits.str.replace(" ]","")
    df["logits"]=df.logits.str.replace("[","")
    df["logits"]=df.logits.str.replace("]","")
    df["logits"]=df.logits.str.replace(" ",",")


    df["logits"]=df.logits.apply(literal_eval)


class CLS3Dataset(Dataset):
    def __init__(self, df, mode, fold):

        self.df = df
        self.mode = mode
        self.fold=fold
        self.logits_bowel=pd.read_csv(data_dir_bowel+"/"+f"model{self.fold}/logits.csv")
        self.logits_lsk=pd.read_csv(data_dir_lsk+"/"+f"model{self.fold}/logits.csv")
        self.logits_bowel.set_index(["patient_id","series_id"],inplace=True)
        self.logits_lsk.set_index(["patient_id","series_id","organ"],inplace=True)
        extract_list(self.logits_bowel)
        extract_list(self.logits_lsk)

    def __len__(self):
        return self.df.patient_id.nunique()

    def __getitem__(self, index):
        rows= self.df.loc[[self.df.index.unique(level=0)[index]]] # unique patient, may have up to two series (rows)

        
        data_bowel=[]
        data_liver=[]
        data_spleen=[]
        data_kidney=[]
        data_hu=[]
        for i in range(len(rows)):
            row=rows.iloc[i]
            pid=row.patient_id
            sid=row.series_id
            dbowel=np.array(self.logits_bowel.loc[(pid,sid)].values[0])
            dliver=np.array(self.logits_lsk.loc[(pid,sid,0)].values[0])
            dspleen=np.array(self.logits_lsk.loc[(pid,sid,1)].values[0])
            dkidney=np.array(self.logits_lsk.loc[(pid,sid,2)].values[0])

            dbowel=np.concatenate((dbowel,np.array([row.hu])))
            dliver=np.concatenate((dliver,np.array([row.hu])))
            dspleen=np.concatenate((dspleen,np.array([row.hu])))
            dkidney=np.concatenate((dkidney,np.array([row.hu])))

            data_bowel.append(dbowel)
            data_liver.append(dliver)
            data_spleen.append(dspleen)
            data_kidney.append(dkidney)
            data_hu.append((row.hu,))

        if len(rows)==1:
            data_bowel.append(data_bowel[0])
            data_liver.append(data_liver[0])
            data_spleen.append(data_spleen[0])
            data_kidney.append(data_kidney[0])
            data_hu.append(data_hu[0])

        data_bowel=np.stack(data_bowel,axis=0)
        data_liver=np.stack(data_liver,axis=0)
        data_spleen=np.stack(data_spleen,axis=0)
        data_kidney=np.stack(data_kidney,axis=0)
        data_hu=np.stack(data_hu)
        data_bowel = torch.tensor(data_bowel).float()
        data_liver = torch.tensor(data_liver).float()
        data_spleen = torch.tensor(data_spleen).float()
        data_kidney = torch.tensor(data_kidney).float()
        data_hu = torch.tensor(data_hu).float()
       
                
        data={"bowel":data_bowel,"liver":data_liver,"spleen":data_spleen,"kidney":data_kidney,"hu":data_hu}        
        
        if self.mode != 'test':
            labels={"liver":row.liver,"spleen":row.spleen,"kidney":row.kidney,"bowel":row.bowel,"extravasation":row.extravasation,"any_injury":row.any_injury}
            labels={k:torch.tensor((labels[k])).float() for k in labels}
           
            return data, labels
        else:
            return data

In [6]:
rcParams['figure.figsize'] = 20,8

df_show = df
dataset_show = CLS3Dataset(df_show, 'train', fold=0)
print(len(df),len(dataset_show))
loader_show = torch.utils.data.DataLoader(dataset_show, batch_size=batch_size, shuffle=True, num_workers=num_workers,pin_memory=True)

4711 3147


In [7]:
for idx in range(6):
    
    data, lbl = dataset_show[idx]
    print(data)
    print(lbl)

{'bowel': tensor([[ -3.0672,  -0.9340, 174.0000],
        [ -3.0672,  -0.9340, 174.0000]]), 'liver': tensor([[  2.3705,  -0.1778,  -2.9909, 174.0000],
        [  2.3705,  -0.1778,  -2.9909, 174.0000]]), 'spleen': tensor([[  2.3503,  -0.5607,  -2.7402, 174.0000],
        [  2.3503,  -0.5607,  -2.7402, 174.0000]]), 'kidney': tensor([[ 2.4844e+00, -1.0590e-01, -3.2330e+00,  1.7400e+02],
        [ 2.4844e+00, -1.0590e-01, -3.2330e+00,  1.7400e+02]]), 'hu': tensor([[174.],
        [174.]])}
{'liver': tensor(0.), 'spleen': tensor(0.), 'kidney': tensor(0.), 'bowel': tensor(0.), 'extravasation': tensor(0.), 'any_injury': tensor(0.)}
{'bowel': tensor([[ -3.1386,  -1.0246, 101.0000],
        [ -3.1604,  -1.0222, 255.0000]]), 'liver': tensor([[ 3.6539e+00, -1.1777e-01, -4.3981e+00,  1.0100e+02],
        [ 3.7400e+00, -3.1130e-01, -4.3147e+00,  2.5500e+02]]), 'spleen': tensor([[ 3.2679e+00,  3.2151e-01, -4.7434e+00,  1.0100e+02],
        [ 3.1059e+00, -2.3283e-01, -4.2778e+00,  2.5500e+02]]), 'kid

In [8]:
batch=next(iter(loader_show)) 
print(batch[0]["bowel"])

tensor([[[ -3.1519,  -0.9257, 240.0000],
         [ -3.1519,  -0.9257, 240.0000]],

        [[ -3.2237,  -0.9982, 393.0000],
         [ -3.2237,  -0.9982, 393.0000]],

        [[ -3.2912,  -1.0137, 248.2500],
         [ -3.2912,  -1.0137, 248.2500]],

        [[ -3.2043,  -1.0156, 190.0000],
         [ -3.2253,  -1.0093, 330.0000]],

        [[ -3.2450,  -1.0428, 226.0000],
         [ -3.3861,  -1.0642, 364.0000]],

        [[ -3.2770,  -0.9674, 233.0000],
         [ -3.2662,  -1.0042, 465.0000]],

        [[ -3.1396,  -0.9280, 389.0000],
         [ -3.1396,  -0.9280, 389.0000]],

        [[ -3.2520,  -1.0462, 180.0000],
         [ -3.2138,  -0.9972, 323.0000]],

        [[ -3.3178,  -0.9919, 118.0000],
         [ -3.3178,  -0.9919, 118.0000]],

        [[ -3.1575,  -1.0000, 155.0000],
         [ -3.0468,  -0.9196, 370.0000]],

        [[ -3.2322,  -0.9443, 281.0000],
         [ -3.2322,  -0.9443, 281.0000]],

        [[ -3.2996,  -1.0566, 205.0000],
         [ -3.2996,  -1.0566, 205.0

In [9]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()
    def reset(self):
        self.avg = 0
        self.sum = 0
        self.weight = 0
        self.count=0
    def update(self, vals, w=1):
        self.val = vals
        self.count += len(vals)
        self.sum += np.sum(vals * w)
        self.weight += np.sum(w)
        self.avg = self.sum / self.weight


# Model

In [10]:


class PENet(nn.Module):
    def __init__(self):
        super().__init__()

        self.hu_weight=nn.Sequential(nn.Linear(2,16),nn.ReLU(),nn.Linear(16,1),nn.Sigmoid()) # extravasation weighting by hu
        self.inj_bias=nn.Parameter(torch.zeros(8))

    def forward(self, x):  #N,series,logits

        hu= x["bowel"][:,:,2] 
        bowel  = x["bowel"][:,:,0]
        extravasation=x["bowel"][:,:,1]
        liver=x["liver"][:,:,:-1]
        spleen=x["spleen"][:,:,:-1]
        kidney=x["kidney"][:,:,:-1]
        hu_weight=self.hu_weight(hu).squeeze() # N

        choose=0 # lower hu -> portal venous phase seems more reliable
        if choose=="mean":
            extravasation=extravasation*torch.stack((hu_weight,1-hu_weight),dim=1)
            extravasation=extravasation.sum(dim=1)
            bowel = bowel.mean(dim=1)
            liver= liver.mean(dim=1)
            spleen=spleen.mean(dim=1)
            kidney=kidney.mean(dim=1)
        
        elif choose=="max":
            extravasation=extravasation.max(dim=1)[0]
            bowel = bowel.max(dim=1)[0]
            liver= liver.max(dim=1)[0]
            spleen=spleen.max(dim=1)[0]
            kidney=kidney.max(dim=1)[0]

        else:
            extravasation=extravasation[:,1]  #extravsation maybe slightly better from late arterial phase (higher hu)
            bowel = bowel[:,choose]
            liver= liver[:,choose,:]
            spleen=spleen[:,choose,:]
            kidney=kidney[:,choose,:]
        

        
        inj_bias=self.inj_bias

        bowel=bowel+inj_bias[0]
        extravasation=extravasation+inj_bias[1]
        liver=liver+F.pad(inj_bias[2:4],(1,0))
        spleen=spleen+F.pad(inj_bias[4:6],(1,0))
        kidney=kidney+F.pad(inj_bias[6:8],(1,0))        
        
        logits={"liver":liver,"spleen":spleen,"kidney":kidney,"bowel":bowel,"extravasation":extravasation}
        return logits

In [11]:
from torchinfo import summary
model = PENet()
model = model.cuda()
print(summary(model))


Layer (type:depth-idx)                   Param #
PENet                                    8
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       48
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       17
│    └─Sigmoid: 2-4                      --
Total params: 73
Trainable params: 73
Non-trainable params: 0


# Scoring

In [12]:
import sklearn

class ParticipantVisibleError(Exception):
    pass

def normalize_probabilities_to_one(df: pd.DataFrame, group_columns: list) -> pd.DataFrame:
    # Normalize the sum of each row's probabilities to 100%.
    # 0.75, 0.75 => 0.5, 0.5
    # 0.1, 0.1 => 0.5, 0.5
    row_totals = df[group_columns].sum(axis=1)
    if row_totals.min() == 0:
        raise ParticipantVisibleError('All rows must contain at least one non-zero prediction')
    for col in group_columns:
        df[col] /= row_totals
    return df


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    '''
    Pseudocode:
    1. For every label group (liver, bowel, etc):
        - Normalize the sum of each row's probabilities to 100%.
        - Calculate the sample weighted log loss.
    2. Derive a new any_injury label by taking the max of 1 - p(healthy) for each label group
    3. Calculate the sample weighted log loss for the new label group
    4. Return the average of all of the label group log losses as the final score.
    '''
    del solution[row_id_column_name]
    del submission[row_id_column_name]
    # Run basic QC checks on the inputs
    if not pd.api.types.is_numeric_dtype(submission.values):
        raise ParticipantVisibleError('All submission values must be numeric')

    if not np.isfinite(submission.values).all():
        raise ParticipantVisibleError('All submission values must be finite')

    if solution.min().min() < 0:
        raise ParticipantVisibleError('All labels must be at least zero')
    if submission.min().min() < 0:
        raise ParticipantVisibleError('All predictions must be at least zero')

    # Calculate the label group log losses
    binary_targets = ['bowel', 'extravasation']
    triple_level_targets = ['kidney', 'liver', 'spleen']
    all_target_categories = binary_targets + triple_level_targets

    label_group_losses = []
    for category in all_target_categories:
        if category in binary_targets:
            col_group = [f'{category}_healthy', f'{category}_injury']
        else:
            col_group = [f'{category}_healthy', f'{category}_low', f'{category}_high']

        solution = normalize_probabilities_to_one(solution, col_group)

        for col in col_group:
            if col not in submission.columns:
                raise ParticipantVisibleError(f'Missing submission column {col}')
        submission = normalize_probabilities_to_one(submission, col_group)
        label_group_losses.append(
            sklearn.metrics.log_loss(
                y_true=solution[col_group].values,
                y_pred=submission[col_group].values,
                sample_weight=solution[f'{category}_weight'].values
            )
        )

    # Derive a new any_injury label by taking the max of 1 - p(healthy) for each label group
    healthy_cols = [x + '_healthy' for x in all_target_categories]
    any_injury_labels = (1 - solution[healthy_cols]).max(axis=1)
    any_injury_predictions = (1 - submission[healthy_cols]).max(axis=1)
    any_injury_loss = sklearn.metrics.log_loss(
        y_true=any_injury_labels.values,
        y_pred=any_injury_predictions.values,
        sample_weight=solution['any_injury_weight'].values
    )

    label_group_losses.append(any_injury_loss)
    all_target_categories.append("any_injury")
    
    group_losses={all_target_categories[i]:label_group_losses[i] for i in range(len(all_target_categories)) }
    return np.mean(label_group_losses),group_losses


In [13]:
# Assign the appropriate weights to each category
def create_training_solution(y_train):
    sol_train = y_train.copy()
    
    # bowel healthy|injury sample weight = 1|2
    sol_train['bowel_weight'] = np.where(sol_train['bowel_injury'] == 1, 2, 1)
    
    # extravasation healthy/injury sample weight = 1|6
    sol_train['extravasation_weight'] = np.where(sol_train['extravasation_injury'] == 1, 6, 1)
    
    # kidney healthy|low|high sample weight = 1|2|4
    sol_train['kidney_weight'] = np.where(sol_train['kidney_low'] == 1, 2, np.where(sol_train['kidney_high'] == 1, 4, 1))
    
    # liver healthy|low|high sample weight = 1|2|4
    sol_train['liver_weight'] = np.where(sol_train['liver_low'] == 1, 2, np.where(sol_train['liver_high'] == 1, 4, 1))
    
    # spleen healthy|low|high sample weight = 1|2|4
    sol_train['spleen_weight'] = np.where(sol_train['spleen_low'] == 1, 2, np.where(sol_train['spleen_high'] == 1, 4, 1))
    
    # any healthy|injury sample weight = 1|6
    sol_train['any_injury_weight'] = np.where(sol_train['any_injury'] == 1, 6, 1)
    return sol_train


In [14]:
criterion = nn.CrossEntropyLoss(reduction='none').cuda()
criterion2 = nn.BCEWithLogitsLoss(reduction='none').cuda()
criterion2a=nn.BCELoss(reduction="none").cuda()

# Train/Valid funuctions

In [15]:



def train_func(model, loader_train, optimizer, scaler,total_weight,debug=False):
    model.train()
    train_loss = [] 
    group_loss={k:[] for k in ["liver","spleen","kidney","bowel","extravasation","any_injury"]}
    bar = tqdm(loader_train)
    for data, targets  in bar:
        optimizer.zero_grad()
        data = {k:data[k].cuda() for k in data}
        targets = {k: targets[k].cuda() for k in targets}
        for k in ["liver","spleen","kidney"]:
            targets[k]=F.one_hot(targets[k].long(), num_classes= 3).float()
            
  
        with amp.autocast():

            logits=model(data)
            dloss={}
            prob_injury={}
            dloss["bowel"] = criterion2(logits["bowel"], targets["bowel"])
            dloss["extravasation"]=criterion2(logits["extravasation"],targets["extravasation"])
            for k in ["liver","spleen","kidney"]:
                #print(targets[k].shape,logits[k].shape)
                dloss[k]=criterion(logits[k],targets[k])
                #print("D",dloss[k],targets[k])
            weights={}
            weights["bowel"]=1+targets["bowel"]
            weights["extravasation"]= 1+targets["extravasation"]*5
            for k in ["liver","spleen","kidney"]:
                weights[k]=targets[k][:,0]+targets[k][:,1]*2+targets[k][:,2]*4

        
        # without amp            
        for k in ["liver","spleen","kidney"]:
            prob_injury[k]=1-nn.Softmax(dim=1)(logits[k].float())[:,0]
        for k in ["bowel","extravasation"]:
            prob_injury[k]=nn.Sigmoid()(logits[k].float())
        any_injury_prob=torch.stack([prob_injury[k] for k in prob_injury],dim=1).max(dim=1)[0]

        dloss["any_injury"] = criterion2a(any_injury_prob,targets["any_injury"])     
        weights["any_injury"]= 1+targets["any_injury"]*5


        for k in dloss:
            dloss[k]=torch.mean(dloss[k]*weights[k])/total_weight[k]

        loss=0
        
        nloss=0
        for key in dloss : 
            loss += dloss[key]
            nloss+=1
        loss=loss/nloss

        if not debug:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        train_loss.append(loss.item())
        bar.set_description(f'smth:{np.mean(train_loss[-100:]):.4f}')
        for k in dloss:
            group_loss[k].append(dloss[k].item())
    for k in group_loss:
        group_loss[k]=np.mean(group_loss[k])
    return np.mean(train_loss),group_loss


def valid_func(model, loader_valid,solution):
    model.eval()
    bar = tqdm(loader_valid)
    probs={"liver":[],"spleen":[],"kidney":[],"bowel":[],"extravasation":[]}
    submission={}
    with torch.no_grad():
        for data, targets in bar:
            data = {k:data[k].cuda() for k in data}
            targets = {k: targets[k].cuda() for k in targets}
  
            with amp.autocast():
                logits = model(data)
                
            for k in ["bowel","extravasation"]:
                probs[k].append(nn.Sigmoid()(logits[k].float()))
            for k in ["liver","spleen","kidney"]:
                probs[k].append(nn.Softmax(dim=1)(logits[k].float()))
    
    for k in probs:
        probs[k]=torch.cat(probs[k],dim=0).numpy(force=True)        

    for k in ["liver","spleen","kidney"]:
        submission[f"{k}_healthy"]=probs[k][:,0]
        submission[f"{k}_low"]=probs[k][:,1]
        submission[f"{k}_high"]=probs[k][:,2]
    for k in ["bowel","extravasation"]:
        submission[f"{k}_healthy"]=1-probs[k]
        submission[f"{k}_injury"]=probs[k]
                   
                   
    submission["patient_id"]=solution["patient_id"]
    submission=pd.DataFrame(submission)
    valid_loss,group_losses = score(solution.copy(),submission,'patient_id')
    #print("valid_func",group_losses)
    return valid_loss,group_losses
            

# Training

In [16]:
from torch.utils.data import WeightedRandomSampler
kernel_type="lstm "
def total_weights(df):
    dfp=df.drop_duplicates("patient_id")
    total_weight={}
    total_weight["bowel"]=np.mean(1+dfp.bowel)
    total_weight["extravasation"]=np.mean(1+5*dfp.extravasation)
    total_weight["any_injury"]=np.mean(1+5*dfp.any_injury)
    for k in ["liver","spleen","kidney"]:
        total_weight[k]=np.mean(np.where(dfp[k]==0,1,0)+np.where(dfp[k]==1,2,0)+np.where(dfp[k]==2,4,0))
    return total_weight


def run(fold):

    log_file = os.path.join(log_dir, f'{kernel_type}.txt')
    model_file = os.path.join(model_dir, f'{kernel_type}_fold{fold}_best.pth')

    train_ = df[(df['fold'] != fold)]
    valid_ = df[(df['fold'] == fold)]
    total_weights_train=total_weights(train_)
    print("total weights",total_weights_train)
    total_weights_valid=total_weights(valid_)
    #print(train_bowel_weight,train_extravasation_weight)
    
    dataset_train = CLS3Dataset(train_, 'train',fold=fold)
    dataset_valid = CLS3Dataset(valid_, 'valid',fold=fold)

    
    loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True,pin_memory=True)

    loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, num_workers=num_workers,pin_memory=True)

    model = PENet()
    model = model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=init_lr,weight_decay=1e-4)
    scaler = torch.cuda.amp.GradScaler() 

    metric_best = np.inf

    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, n_epochs, eta_min=eta_min)

    print("#train", len(dataset_train), "#valid",len(dataset_valid))
    pvalid=valid_.drop_duplicates(subset=["patient_id"])
    solution=create_training_solution(df_p.set_index("patient_id",drop=False).loc[pvalid.patient_id])
    for epoch in range(1, n_epochs+1):
        print(time.ctime(), 'Epoch:', epoch)
        train_loss,train_group_loss = train_func(model, loader_train, optimizer, scaler,total_weights_train)
        #train_loss=0
        scheduler_cosine.step(epoch-1)   
        valid_loss,valid_group_loss = valid_func(model, loader_valid,solution)
        metric = valid_loss

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {valid_loss:.5f}, metric: {(metric):.6f}.'
        print(content)
        with open(log_file, 'a') as appender:
            appender.write(content + '\n')

        if metric < metric_best:
            print(f'metric_best ({metric_best:.6f} --> {metric:.6f}). Saving model ...')
            torch.save(model.state_dict(), model_file)
            metric_best = metric


        # Save Last
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scaler_state_dict': scaler.state_dict() if scaler else None,
                'score_best': metric_best,
            },
            model_file.replace('_best', '_last')
        )
    print(valid_group_loss)
    print(model.inj_bias)
    del model
    torch.cuda.empty_cache()
    gc.collect()
    return metric_best

# Run

In [17]:
cv=[]
TRAINING=True
if TRAINING:
    for i in range(5):
        metric=run(i)
        cv.append(metric)

print(cv)
print(np.mean(cv))

total weights {'bowel': 1.0218514104092173, 'extravasation': 1.313865713150576, 'any_injury': 2.3369090186730235, 'liver': 1.1386571315057608, 'spleen': 1.2042113627334128, 'kidney': 1.0985299960270163}
#train 2517 #valid 630
Mon Sep 18 01:35:12 2023 Epoch: 1


smth:0.3669: 100%|██████████| 39/39 [00:01<00:00, 31.46it/s]
100%|██████████| 10/10 [00:00<00:00, 27.91it/s]


Mon Sep 18 01:35:13 2023 Fold 0, Epoch 1, lr: 0.0100000, train loss: 0.36685, valid loss: 0.41267, metric: 0.412670.
metric_best (inf --> 0.412670). Saving model ...
Mon Sep 18 01:35:13 2023 Epoch: 2


smth:0.3663: 100%|██████████| 39/39 [00:00<00:00, 40.21it/s]
100%|██████████| 10/10 [00:00<00:00, 26.57it/s]


Mon Sep 18 01:35:15 2023 Fold 0, Epoch 2, lr: 0.0090451, train loss: 0.36633, valid loss: 0.41005, metric: 0.410047.
metric_best (0.412670 --> 0.410047). Saving model ...
Mon Sep 18 01:35:15 2023 Epoch: 3


smth:0.3647: 100%|██████████| 39/39 [00:01<00:00, 37.93it/s]
100%|██████████| 10/10 [00:00<00:00, 27.63it/s]


Mon Sep 18 01:35:16 2023 Fold 0, Epoch 3, lr: 0.0065451, train loss: 0.36472, valid loss: 0.40916, metric: 0.409163.
metric_best (0.410047 --> 0.409163). Saving model ...
Mon Sep 18 01:35:16 2023 Epoch: 4


smth:0.3651: 100%|██████████| 39/39 [00:01<00:00, 38.32it/s]
100%|██████████| 10/10 [00:00<00:00, 28.72it/s]


Mon Sep 18 01:35:17 2023 Fold 0, Epoch 4, lr: 0.0034549, train loss: 0.36510, valid loss: 0.40836, metric: 0.408365.
metric_best (0.409163 --> 0.408365). Saving model ...
Mon Sep 18 01:35:17 2023 Epoch: 5


smth:0.3634: 100%|██████████| 39/39 [00:00<00:00, 41.80it/s]
100%|██████████| 10/10 [00:00<00:00, 28.96it/s]


Mon Sep 18 01:35:19 2023 Fold 0, Epoch 5, lr: 0.0009549, train loss: 0.36344, valid loss: 0.40847, metric: 0.408469.
{'bowel': 0.1294899045157354, 'extravasation': 0.6071779593881198, 'kidney': 0.33200714522931385, 'liver': 0.41204828261798726, 'spleen': 0.4363798071314709, 'any_injury': 0.5337088902260794}
Parameter containing:
tensor([ 0.0757,  0.1456,  0.2172,  0.5118,  0.2041,  0.5824, -0.2866,  0.1898],
       device='cuda:0', requires_grad=True)
total weights {'bowel': 1.0214541120381406, 'extravasation': 1.3019467620182756, 'any_injury': 2.360746920937624, 'liver': 1.1394517282479142, 'spleen': 1.2256654747715534, 'kidney': 1.1017083829956298}
#train 2517 #valid 630
Mon Sep 18 01:35:19 2023 Epoch: 1


smth:0.3503: 100%|██████████| 39/39 [00:01<00:00, 38.58it/s]
100%|██████████| 10/10 [00:00<00:00, 28.80it/s]


Mon Sep 18 01:35:21 2023 Fold 1, Epoch 1, lr: 0.0100000, train loss: 0.35033, valid loss: 0.38951, metric: 0.389513.
metric_best (inf --> 0.389513). Saving model ...
Mon Sep 18 01:35:21 2023 Epoch: 2


smth:0.3463: 100%|██████████| 39/39 [00:01<00:00, 38.35it/s]
100%|██████████| 10/10 [00:00<00:00, 27.36it/s]


Mon Sep 18 01:35:22 2023 Fold 1, Epoch 2, lr: 0.0090451, train loss: 0.34626, valid loss: 0.38721, metric: 0.387214.
metric_best (0.389513 --> 0.387214). Saving model ...
Mon Sep 18 01:35:22 2023 Epoch: 3


smth:0.3452: 100%|██████████| 39/39 [00:01<00:00, 38.17it/s]
100%|██████████| 10/10 [00:00<00:00, 25.05it/s]


Mon Sep 18 01:35:23 2023 Fold 1, Epoch 3, lr: 0.0065451, train loss: 0.34520, valid loss: 0.38620, metric: 0.386197.
metric_best (0.387214 --> 0.386197). Saving model ...
Mon Sep 18 01:35:23 2023 Epoch: 4


smth:0.3435: 100%|██████████| 39/39 [00:01<00:00, 38.80it/s]
100%|██████████| 10/10 [00:00<00:00, 28.02it/s]


Mon Sep 18 01:35:25 2023 Fold 1, Epoch 4, lr: 0.0034549, train loss: 0.34354, valid loss: 0.38602, metric: 0.386021.
metric_best (0.386197 --> 0.386021). Saving model ...
Mon Sep 18 01:35:25 2023 Epoch: 5


smth:0.3439: 100%|██████████| 39/39 [00:01<00:00, 36.84it/s]
100%|██████████| 10/10 [00:00<00:00, 27.36it/s]


Mon Sep 18 01:35:26 2023 Fold 1, Epoch 5, lr: 0.0009549, train loss: 0.34388, valid loss: 0.38597, metric: 0.385967.
metric_best (0.386021 --> 0.385967). Saving model ...
{'bowel': 0.13871470929586457, 'extravasation': 0.6281924809889661, 'kidney': 0.2353815658035925, 'liver': 0.4036648730033521, 'spleen': 0.36126871914640135, 'any_injury': 0.5485795755023781}
Parameter containing:
tensor([ 0.4446, -0.1165,  0.5925,  0.4670,  0.5383,  0.6102,  0.0449,  0.1822],
       device='cuda:0', requires_grad=True)
total weights {'bowel': 1.0206513105639397, 'extravasation': 1.3157267672756157, 'any_injury': 2.340349483717236, 'liver': 1.1389992057188245, 'spleen': 1.1977760127084989, 'kidney': 1.096902303415409}
#train 2518 #valid 629
Mon Sep 18 01:35:27 2023 Epoch: 1


smth:0.3440: 100%|██████████| 39/39 [00:01<00:00, 34.98it/s]
100%|██████████| 10/10 [00:00<00:00, 28.83it/s]


Mon Sep 18 01:35:28 2023 Fold 2, Epoch 1, lr: 0.0100000, train loss: 0.34398, valid loss: 0.40050, metric: 0.400500.
metric_best (inf --> 0.400500). Saving model ...
Mon Sep 18 01:35:28 2023 Epoch: 2


smth:0.3377: 100%|██████████| 39/39 [00:00<00:00, 39.43it/s]
100%|██████████| 10/10 [00:00<00:00, 26.21it/s]


Mon Sep 18 01:35:30 2023 Fold 2, Epoch 2, lr: 0.0090451, train loss: 0.33774, valid loss: 0.39565, metric: 0.395648.
metric_best (0.400500 --> 0.395648). Saving model ...
Mon Sep 18 01:35:30 2023 Epoch: 3


smth:0.3369: 100%|██████████| 39/39 [00:01<00:00, 37.33it/s]
100%|██████████| 10/10 [00:00<00:00, 27.67it/s]


Mon Sep 18 01:35:31 2023 Fold 2, Epoch 3, lr: 0.0065451, train loss: 0.33686, valid loss: 0.39335, metric: 0.393352.
metric_best (0.395648 --> 0.393352). Saving model ...
Mon Sep 18 01:35:31 2023 Epoch: 4


smth:0.3370: 100%|██████████| 39/39 [00:01<00:00, 38.76it/s]
100%|██████████| 10/10 [00:00<00:00, 28.21it/s]


Mon Sep 18 01:35:33 2023 Fold 2, Epoch 4, lr: 0.0034549, train loss: 0.33696, valid loss: 0.39227, metric: 0.392274.
metric_best (0.393352 --> 0.392274). Saving model ...
Mon Sep 18 01:35:33 2023 Epoch: 5


smth:0.3362: 100%|██████████| 39/39 [00:01<00:00, 38.97it/s]
100%|██████████| 10/10 [00:00<00:00, 27.43it/s]


Mon Sep 18 01:35:34 2023 Fold 2, Epoch 5, lr: 0.0009549, train loss: 0.33619, valid loss: 0.39182, metric: 0.391817.
metric_best (0.392274 --> 0.391817). Saving model ...
{'bowel': 0.1576599504395457, 'extravasation': 0.5939098765016895, 'kidney': 0.25858719469414554, 'liver': 0.4330129905521882, 'spleen': 0.42263388159644166, 'any_injury': 0.48509923540630817}
Parameter containing:
tensor([-0.4472,  0.2144,  0.3135,  0.5197,  0.5614,  0.8203,  0.1594,  0.2904],
       device='cuda:0', requires_grad=True)
total weights {'bowel': 1.0198570293884035, 'extravasation': 1.3196981731532962, 'any_injury': 2.382049245432883, 'liver': 1.1445591739475773, 'spleen': 1.220810166799047, 'kidney': 1.108022239872915}
#train 2518 #valid 629
Mon Sep 18 01:35:34 2023 Epoch: 1


smth:0.3520: 100%|██████████| 39/39 [00:01<00:00, 36.89it/s]
100%|██████████| 10/10 [00:00<00:00, 26.98it/s]


Mon Sep 18 01:35:36 2023 Fold 3, Epoch 1, lr: 0.0100000, train loss: 0.35196, valid loss: 0.39122, metric: 0.391219.
metric_best (inf --> 0.391219). Saving model ...
Mon Sep 18 01:35:36 2023 Epoch: 2


smth:0.3482: 100%|██████████| 39/39 [00:01<00:00, 36.71it/s]
100%|██████████| 10/10 [00:00<00:00, 26.74it/s]


Mon Sep 18 01:35:37 2023 Fold 3, Epoch 2, lr: 0.0090451, train loss: 0.34820, valid loss: 0.38774, metric: 0.387735.
metric_best (0.391219 --> 0.387735). Saving model ...
Mon Sep 18 01:35:37 2023 Epoch: 3


smth:0.3428: 100%|██████████| 39/39 [00:01<00:00, 37.62it/s]
100%|██████████| 10/10 [00:00<00:00, 27.26it/s]


Mon Sep 18 01:35:39 2023 Fold 3, Epoch 3, lr: 0.0065451, train loss: 0.34278, valid loss: 0.38693, metric: 0.386933.
metric_best (0.387735 --> 0.386933). Saving model ...
Mon Sep 18 01:35:39 2023 Epoch: 4


smth:0.3432: 100%|██████████| 39/39 [00:01<00:00, 37.29it/s]
100%|██████████| 10/10 [00:00<00:00, 27.68it/s]


Mon Sep 18 01:35:40 2023 Fold 3, Epoch 4, lr: 0.0034549, train loss: 0.34323, valid loss: 0.38649, metric: 0.386492.
metric_best (0.386933 --> 0.386492). Saving model ...
Mon Sep 18 01:35:40 2023 Epoch: 5


smth:0.3407: 100%|██████████| 39/39 [00:01<00:00, 38.19it/s]
100%|██████████| 10/10 [00:00<00:00, 24.79it/s]


Mon Sep 18 01:35:42 2023 Fold 3, Epoch 5, lr: 0.0009549, train loss: 0.34070, valid loss: 0.38642, metric: 0.386417.
metric_best (0.386492 --> 0.386417). Saving model ...
{'bowel': 0.17991307771897386, 'extravasation': 0.5926008591987522, 'kidney': 0.2616155422390059, 'liver': 0.3708352839109541, 'spleen': 0.39096899732696433, 'any_injury': 0.522570614655764}
Parameter containing:
tensor([0.3925, 0.3675, 0.6165, 0.6138, 0.5974, 0.7722, 0.0571, 0.1274],
       device='cuda:0', requires_grad=True)
total weights {'bowel': 1.0178713264495631, 'extravasation': 1.3375694996028593, 'any_injury': 2.3721207307386813, 'liver': 1.1453534551231135, 'spleen': 1.2065131056393963, 'kidney': 1.096902303415409}
#train 2518 #valid 629
Mon Sep 18 01:35:42 2023 Epoch: 1


smth:0.3995: 100%|██████████| 39/39 [00:01<00:00, 37.88it/s]
100%|██████████| 10/10 [00:00<00:00, 28.64it/s]


Mon Sep 18 01:35:44 2023 Fold 4, Epoch 1, lr: 0.0100000, train loss: 0.39945, valid loss: 0.41893, metric: 0.418930.
metric_best (inf --> 0.418930). Saving model ...
Mon Sep 18 01:35:44 2023 Epoch: 2


smth:0.3929: 100%|██████████| 39/39 [00:01<00:00, 37.19it/s]
100%|██████████| 10/10 [00:00<00:00, 25.84it/s]


Mon Sep 18 01:35:45 2023 Fold 4, Epoch 2, lr: 0.0090451, train loss: 0.39290, valid loss: 0.41598, metric: 0.415983.
metric_best (0.418930 --> 0.415983). Saving model ...
Mon Sep 18 01:35:45 2023 Epoch: 3


smth:0.3887: 100%|██████████| 39/39 [00:01<00:00, 38.52it/s]
100%|██████████| 10/10 [00:00<00:00, 26.99it/s]


Mon Sep 18 01:35:46 2023 Fold 4, Epoch 3, lr: 0.0065451, train loss: 0.38869, valid loss: 0.41574, metric: 0.415739.
metric_best (0.415983 --> 0.415739). Saving model ...
Mon Sep 18 01:35:46 2023 Epoch: 4


smth:0.3879: 100%|██████████| 39/39 [00:01<00:00, 37.42it/s]
100%|██████████| 10/10 [00:00<00:00, 27.21it/s]


Mon Sep 18 01:35:48 2023 Fold 4, Epoch 4, lr: 0.0034549, train loss: 0.38791, valid loss: 0.41580, metric: 0.415795.
Mon Sep 18 01:35:48 2023 Epoch: 5


smth:0.3878: 100%|██████████| 39/39 [00:01<00:00, 36.42it/s]
100%|██████████| 10/10 [00:00<00:00, 25.95it/s]


Mon Sep 18 01:35:49 2023 Fold 4, Epoch 5, lr: 0.0009549, train loss: 0.38778, valid loss: 0.41591, metric: 0.415907.
{'bowel': 0.22847702085689542, 'extravasation': 0.5576121675061338, 'kidney': 0.342878644692093, 'liver': 0.4156813197739945, 'spleen': 0.4191367771954269, 'any_injury': 0.531656926500148}
Parameter containing:
tensor([-0.0340,  0.6226,  0.7507, -0.2363,  0.4255,  0.1653,  0.1902,  0.2776],
       device='cuda:0', requires_grad=True)
[0.4083648067066881, 0.38596698729009243, 0.39181718819838646, 0.3864173958417358, 0.415738657964205]
0.3976610072002216
