In [1]:
import ast
import json
import os
import shutil

# import efficientnet.tfkeras as efn
import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from IPython.core.interactiveshell import InteractiveShell
from numba import cuda
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm

from src.augment import Aug, Aug_Crop, Aug_No_transform, Flip_Aug
from src.generator import Generator, GetModel

InteractiveShell.ast_node_interactivity = "all"

In [2]:
df = pd.read_csv("/app/_data/predicted_crop_v1.csv")
list_wrong = df[(df["class"] != "negative") & (df["label"] == "none 1 0 0 1 1")][
    "id_image"
].tolist()
df = df.query("id_image not in @list_wrong").reset_index(drop=True)

with open("/app/_data/base_config.json", "r") as f:
    base_config = json.load(f)
base_config["EFFB7"]["SEED"] = 1480
base_config

{'EFFB7': {'IMG_SIZE': 600,
  'BATCH_SIZE': 4,
  'SEED': 1480,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b7/efficientnetb7_notop.h5'},
 'CLASS': {'negative': 0, 'typical': 1, 'indeterminate': 2, 'atypical': 3},
 'EFFB4': {'IMG_SIZE': 380,
  'BATCH_SIZE': 50,
  'SEED': 42,
  'WEIGHTS': '/app/_data/efficientnet-b4_noisy-student_notop.h5'},
 'EFFB0': {'IMG_SIZE': 224,
  'BATCH_SIZE': 120,
  'SEED': 42,
  'WEIGHTS': 'imagenet'},
 'EFFB6': {'IMG_SIZE': 528,
  'BATCH_SIZE': 8,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b6/efficientnetb6_notop.h5'}}

# train

In [3]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale


In [4]:
skf = StratifiedKFold(
    n_splits=6, random_state=base_config["EFFB7"]["SEED"], shuffle=True
)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(df, df["class"]):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [5]:
# shutil.rmtree('/app/_data/models/EffB7_cropped/backup')

In [6]:
class Aug_Crop:
    def augment_image(img):
        h = img.shape[0]
        w = img.shape[1]
        transform = albumentations.Compose(
            [
#                 albumentations.OneOf(
#                     [
#                         albumentations.MotionBlur((3, 5)),
#                         albumentations.MedianBlur(blur_limit=5),
#                         albumentations.GaussianBlur(blur_limit=(3, 5), sigma_limit=0),
#                         albumentations.Blur(blur_limit=(3, 5)),
#                     ],
#                     p=0.1,
#                 ),
#                 albumentations.OneOf(
#                     [
#                         albumentations.GaussNoise(var_limit=[10, 20], mean=1),
#                         albumentations.ImageCompression(
#                             quality_lower=85, quality_upper=100, compression_type=1
#                         ),
#                         albumentations.MultiplicativeNoise(
#                             multiplier=(0.95, 1.05), per_channel=False, elementwise=True
#                         ),
#                         albumentations.Downscale(
#                             scale_min=0.85,
#                             scale_max=0.99,
#                             interpolation=cv2.INTER_LINEAR,
#                         ),
#                     ],
#                     p=0.1,
#                 ),
                albumentations.OneOf(
                    [
                        albumentations.RandomBrightnessContrast(
                            brightness_limit=(-0.1, 0.2),
                            contrast_limit=(-0.1, 0.2),
                            brightness_by_max=True,
                        ),
#                         albumentations.augmentations.transforms.Sharpen(
#                             alpha=(0.05, 0.1), lightness=(0.5, 1.0)
#                         ),
                        albumentations.augmentations.transforms.RandomToneCurve(
                            scale=0.05
                        ),
                    ],
                    p=0.1,
                ),
#                 albumentations.OneOf(
#                     [
#                         albumentations.OpticalDistortion(
#                             distort_limit=0.1,
#                             shift_limit=0.1,
#                             border_mode=1,
#                         ),
#                         albumentations.ElasticTransform(
#                             alpha=2.0,
#                             sigma=2.0,
#                             alpha_affine=2.0,
#                             interpolation=cv2.INTER_LINEAR,
#                             border_mode=1,
#                         ),
#                         albumentations.GridDistortion(
#                             num_steps=5,
#                             distort_limit=0.1,
#                             interpolation=cv2.INTER_LINEAR,
#                             border_mode=1,
#                         ),
#                     ],
#                     p=0.1,
#                 ),
                albumentations.OneOf(
                    [
                        albumentations.augmentations.crops.transforms.CropAndPad(
                            px=None,
                            percent=(-0.01, 0.05),
                            pad_mode=1,
                            pad_cval=0,
                            pad_cval_mask=0,
                            keep_size=True,
                            sample_independently=True,
                            interpolation=cv2.INTER_LINEAR,
                        ),
                        albumentations.RandomSizedCrop(
                            min_max_height=(0.95 * h, 0.95 * w),
                            height=h,
                            width=w,
                            w2h_ratio=1.0,
                            interpolation=cv2.INTER_LINEAR,
                        ),
                        albumentations.HorizontalFlip(),
#                         albumentations.RandomRotate90(),
                    ],
                    p=0.1,
                ),
                albumentations.CoarseDropout(
                    max_holes=10,
                    max_height=60,
                    max_width=60,
                    min_holes=1,
                    min_height=6,
                    min_width=6,
                    fill_value=0,
                    p=0.2
                ),
            ]
        )

        return transform(image=img)["image"]

In [7]:
base_config["EFFB7"]["BATCH_SIZE"] = 4

In [8]:
for n in range(0, 6):
    print("\n iter #" + str(n) + "\n")
    val = df.loc[val_ids[n]].sample(frac=1, random_state=base_config["EFFB7"]["SEED"])
    train = df.loc[train_ids[n]].sample(
        frac=1, random_state=base_config["EFFB7"]["SEED"]
    )

    gen_train = Generator(
        df=train,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=600,
        cache_img_path="/app/_data/crop_npy_600/",
        shuffle=True,
        label_columns=[
            "Negative for Pneumonia",
            "Typical Appearance",
            "Indeterminate Appearance",
            "Atypical Appearance",
        ],
        augment_fn=Aug_Crop,
        crop=True,
    )
    gen_valid = Generator(
        df=val,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=600,
        cache_img_path="/app/_data/crop_npy_600/",
        shuffle=False,
        label_columns=[
            "Negative for Pneumonia",
            "Typical Appearance",
            "Indeterminate Appearance",
            "Atypical Appearance",
        ],
        augment_fn=None,
        crop=True,
    )

    get_m = GetModel(
        model_name="EFFB7",
        lr=0.0005,
        activation_func="softmax",
        weights=base_config["EFFB7"]["WEIGHTS"],
        n_classes=4,
        top_dropout_rate=0.5,
        loss=['categorical_crossentropy'],
        metrics=["acc", keras.metrics.AUC(multi_label=True)],
    )
    model = get_m.get_model()

    callbacks = get_m.make_callback(
        model_path="/app/_data/models/EffB7_cropped_600_3/",
        model_name="EffB7_" + str(n) + ".h5",
        tensorboard_path="/app/.tensorboard/EffB7_cropped_32_" + str(n),
        patience_ES=5,
        patience_RLR=2,
        factor_LR=0.5,
        metric_for_monitor="val_loss",
        metric_mode="min",
    )

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=50,
        steps_per_epoch=len(train) // base_config["EFFB7"]["BATCH_SIZE"]//2,
        validation_steps=len(val) // base_config["EFFB7"]["BATCH_SIZE"],
        verbose=1,
        workers=20,
        max_queue_size=500,
        callbacks=callbacks,
    )
    keras.backend.clear_session()


 iter #0

  opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt)

Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.40189, saving model to /app/_data/models/EffB7_cropped_600_3/EffB7_0.h5
Epoch 2/50

Epoch 00002: val_loss improved from 1.40189 to 0.98448, saving model to /app/_data/models/EffB7_cropped_600_3/EffB7_0.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.98448 to 0.92490, saving model to /app/_data/models/EffB7_cropped_600_3/EffB7_0.h5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.92490
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.92490

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.92490
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.92490

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 8/50

Epoch 00008: val_loss improved from 0.92490 to 0.90899, saving model to /app/_data/models/EffB7_c