 Train model for Study Level 

In [None]:
# Import the latest version of wandb
# !pip install -q --upgrade wandb

In [None]:
!/opt/conda/bin/python3.7 -m pip install --upgrade pip
! pip install -q efficientnet

In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow_addons as tfa
from tensorflow.keras import mixed_precision



import os
import gc
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

# Imports for augmentations. 
from albumentations import (Compose, RandomResizedCrop, Cutout, Rotate, HorizontalFlip, 
                            VerticalFlip, RandomBrightnessContrast, ShiftScaleRotate, 
                            CenterCrop, Resize)

# GPU Run Access

In [None]:
# Increase GPU memory as per the need.
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
Train_path='../input/siim-covid19-resized-to-256px-jpg/train/'  # for 256 img_size

#Train_path='../input/siim-covid19-resized-to-512px-png/train/'   # for 512 img_size


In [None]:
# read df_train csv file

df=pd.read_csv('../input/df-train/df_train.csv')

In [None]:
# read study_df from original dataset

train_study_df=pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")

In [None]:
train_study_df.head()

In [None]:
# Using study_id in train_df beacuse original files don't have the study_id and del all non-useable  columns 
# for train this model

In [None]:
train_study_df= train_study_df.drop(['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance'],axis=1)

In [None]:
train_study_df['StudyInstanceUID']=train_study_df['id'].apply(lambda x: x.replace('_study',''))

In [None]:

train_study_df=train_study_df.rename(columns={'id':'study_id'})
train_study_df.head()

# Hyperparameter

In [None]:
df=df.merge(train_study_df,on='StudyInstanceUID')


In [None]:
df.head()

In [None]:
df['path']=df.apply(lambda row:Train_path + row.id +'.jpg',axis=1)  # for 256 img_size

#df['path']=df.apply(lambda row:Train_path + row.id +'.png',axis=1)   # for 512 img_size

In [None]:
df['path'][0]

In [None]:
print('000a312787f2.jpg' in os.listdir(Train_path))

In [None]:
labels = df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
labels = np.argmax(labels, axis=1)
df['study_level'] = labels

In [None]:
df=df.drop(['StudyInstanceUID_count'],axis=1)

In [None]:
df.columns

In [None]:
TRAIN_PATH = '../input/siim-covid19-resized-to-256px-jpg/train/'

TEST_PATH = '../input/siim-covid19-resized-to-256px-jpg/test/'



In [None]:
# for 512 images

#TRAIN_PATH='../input/siim-covid19-resized-to-512px-png/train/'
#TEST_PATH ='../input/siim-covid19-resized-to-512px-png/test/'

In [None]:
train_df=df.copy()

In [None]:
study_df=train_df[['study_id',
                  'Negative for Pneumonia','Typical Appearance',
                  'Indeterminate Appearance','Atypical Appearance']]





In [None]:
study_df.head()

In [None]:
train_df= train_df.drop(['Unnamed: 0','boxes','label','index','0','Path', 'w', 'h', 'class','x_max', 'x_min', 'y_min',
               'label_int','OpacityCount', 'y_max','study_id','Negative for Pneumonia',
               'Typical Appearance','Indeterminate Appearance','Atypical Appearance'],axis=1)

In [None]:
train_df.head()

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

In [None]:
NUM_CLASSES = 4
HEIGHT,WIDTH = 256,256
#HEIGHT,WIDTH = 512,512

CHANNELS = 3
BATCH_SIZE = 8
SEED = 143

In [None]:
def process_img(filepath,label):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=CHANNELS)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    #image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label


def data_augment(image, label):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) 
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) 
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) 
        
    
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta=.1)
        
    
    if p_crop > .7:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.7)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.8)
        else:
            image = tf.image.central_crop(image, central_fraction=.9)
    elif p_crop > .4:
        crop_size = tf.random.uniform([], int(HEIGHT*.8), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])
    
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label

def get_dataset(filenames,labels, training=True):
    dataset = tf.data.Dataset.from_tensor_slices((filenames,labels))
    dataset = dataset.map(process_img,num_parallel_calls=AUTO)
    dataset = dataset.map(data_augment,num_parallel_calls=AUTO)
    dataset = dataset.cache()
    dataset = dataset.repeat()
    if training:
        dataset = dataset.shuffle(2048)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [None]:
import efficientnet.tfkeras as efn

#def create_model():
    
#    pretrained = efn.EfficientNetB4(include_top=False, weights="imagenet",input_shape=[HEIGHT,WIDTH, 3])
            
#    x = pretrained.output
#    x = tf.keras.layers.GlobalAveragePooling2D() (x)
#    outputs = tf.keras.layers.Dense(NUM_CLASSES,activation="softmax", dtype='float32')(x)
        
#    model = tf.keras.Model(pretrained.input, outputs)
#    return model

#model = create_model()
#model.summary()


In [None]:
def create_model():
    
    base_model = efn.EfficientNetB0(include_top=False, weights='imagenet')
    base_model.trainabe = True

    inputs = layers.Input((HEIGHT,WIDTH, 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    
    outputs = layers.Dense(NUM_CLASSES, kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    outputs = layers.Activation('softmax', dtype='float32', name='predictions')(outputs)
    
    return models.Model(inputs, outputs)

tf.keras.backend.clear_session()
#model = create_model()
#model.summary()

In [None]:
import tensorflow_addons as tfa

def compile_model(model, lr=0.001):
    
    optimizer = tf.keras.optimizers.Adam(lr=lr)
    
    loss = tf.keras.losses.CategoricalCrossentropy()
   
    metrics = [
       tfa.metrics.F1Score(num_classes = NUM_CLASSES,average = "macro", name = "f1_score"),
       tf.keras.metrics.CategoricalAccuracy(name='acc')
    ]

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return model

In [None]:
METRIC = "val_acc"
# METRIC="val_auc"
def create_callbacks(kfold,metric = METRIC):
    
    cpk_path = f'./best_model_{kfold}.h5'
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=cpk_path,
        monitor= metric,
        mode='max',
        save_best_only=True,
        verbose=1,
    )

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor= metric,
        mode='max',
        factor=0.1,
        patience=3,
        verbose=0
    )

    earlystop = tf.keras.callbacks.EarlyStopping(
        monitor= metric,
        mode='max',
        patience=10, 
        verbose=1
    )
    
    callbacks = [checkpoint, reducelr, earlystop]         
    
    return callbacks

In [None]:
train_df.head(2)
train_df.to_csv('train_df.csv')

In [None]:
study_df.head(3)
study_df.to_csv('study_df.csv')

In [None]:
from tqdm import tqdm
files_ls= train_df['path']

files_df = pd.DataFrame(list(files_ls), columns = ["filepath"])


labels = np.zeros((len(files_ls),NUM_CLASSES))
tmp_labels = np.zeros((len(files_ls)))

def get_id(filepath):
    tmp = filepath.split("/")[-1]
    tmp = tmp.split(".")[0]
    tmp = tmp.split("_")[-1]
    return tmp

for i in tqdm(range(len(files_ls))):
    image_id = get_id(files_ls[i])
    label_id = train_df[train_df["id"] == image_id]["study_level"]
    labels[i][label_id] = 1
    tmp_labels[i] = label_id
    
print("Labels shape: ",labels.shape)
print(files_ls)

In [None]:
EPOCHS = 30
VERBOSE = 1
N_SPLITS = 5

kfold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
history = {}


for fold,(tID,vID) in enumerate(kfold.split(files_ls,tmp_labels)):
    tFiles, tLabels = list(files_df.iloc[tID]["filepath"]) , labels[tID]
    vFiles, vLabels = list(files_df.iloc[vID]["filepath"]) , labels[vID]
    print("Number of Training Images: ",len(tID))
    print("Number of Validation Images: ",len(vID))
    
    STEPS_PER_EPOCH  = len(tID)//BATCH_SIZE
    VALID_STEPS = len(vID)//BATCH_SIZE
    
    tf.keras.backend.clear_session()
    
    train_ds = get_dataset(tFiles,tLabels, training = True)
    val_ds = get_dataset(vFiles, vLabels, training = False)
    
    

    model = create_model()
    #model = chxnet
    model = compile_model(model, lr=0.00001)
    callbacks = create_callbacks(kfold = fold)

    print("------------------Fold - ",fold+1," --------------------------")
    history[fold] = model.fit(
                        train_ds,
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        validation_data = val_ds,
                        verbose=VERBOSE,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        validation_steps=VALID_STEPS
                       )

In [None]:
plt.figure(figsize=(8*N_SPLITS,24))

for i in range(N_SPLITS):
    acc = history[i].history['acc']
    val_acc = history[i].history['val_acc']
    f1 = history[i].history['f1_score']
    val_f1 = history[i].history['val_f1_score']
    loss = history[i].history['loss']
    val_loss = history[i].history['val_loss']
    epochs_range = range(len(history[i].history['val_loss'])) 
    
    plt.subplot(N_SPLITS, 3,i*3+1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation  Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'FOLD:{str(i)} Training and Validation  Accuracy')
    
    plt.subplot(N_SPLITS, 3,i*3+2)
    plt.plot(epochs_range, f1, label='Training F1 score')
    plt.plot(epochs_range, val_f1, label='Validation  F1 score')
    plt.legend(loc='lower right')
    plt.title(f'FOLD:{str(i)} Training and Validation  F1 score')
    
    plt.subplot(N_SPLITS, 3, i*3+3)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'FOLD:{str(i)} Training and Validation Loss')

plt.show()

In [None]:

BATCH_SIZE = 16

EPOCHS = 30
VERBOSE = 1
N_SPLITS = 5

kfold = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
history = {}



        

for fold,(tID,vID) in enumerate(kfold.split(files_ls,tmp_labels)):
    tFiles, tLabels = list(files_df.iloc[tID]["filepath"]) , labels[tID]
    vFiles, vLabels = list(files_df.iloc[vID]["filepath"]) , labels[vID]
    print("Number of Training Images: ",len(tID))
    print("Number of Validation Images: ",len(vID))
    
    STEPS_PER_EPOCH  = len(tID)//BATCH_SIZE
    VALID_STEPS = len(vID)//BATCH_SIZE
    
    tf.keras.backend.clear_session()
    
    train_ds = get_dataset(tFiles,tLabels, training = True)
    val_ds = get_dataset(vFiles, vLabels, training = False)
    
        
    model = tf.keras.Sequential([
        efn.EfficientNetB5(
            input_shape=(HEIGHT, WIDTH, 3),
            weights='imagenet',
            include_top=False),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss='categorical_crossentropy',
        metrics=[tf.keras.metrics.AUC(multi_label=True)])
    callbacks = create_callbacks(kfold = fold)
    print("------------------Fold - ",fold+1," --------------------------")
    history[fold] = model.fit(
                        train_ds,
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        validation_data = val_ds,
                        verbose=VERBOSE,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        validation_steps=VALID_STEPS
                       )