In [1]:
import ast
import json
import os
import shutil

import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from IPython.core.interactiveshell import InteractiveShell
from numba import cuda
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm

from src.generator import Generator, GetModel
InteractiveShell.ast_node_interactivity = "all"

In [2]:
df = pd.read_csv("/app/_data/train.csv")
list_wrong = df[(df["class"] != "negative") & (df["label"] == "none 1 0 0 1 1")][
    "id_image"
].tolist()
df = df.query("id_image not in @list_wrong").reset_index(drop=True)

with open("/app/_data/base_config.json", "r") as f:
    base_config = json.load(f)
base_config["EFFB7"]["SEED"] = 42
base_config

{'EFFB7': {'IMG_SIZE': 600,
  'BATCH_SIZE': 4,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b7/efficientnetb7_notop.h5'},
 'CLASS': {'negative': 0, 'typical': 1, 'indeterminate': 2, 'atypical': 3},
 'EFFB4': {'IMG_SIZE': 380,
  'BATCH_SIZE': 50,
  'SEED': 42,
  'WEIGHTS': '/app/_data/efficientnet-b4_noisy-student_notop.h5'},
 'EFFB0': {'IMG_SIZE': 224,
  'BATCH_SIZE': 120,
  'SEED': 42,
  'WEIGHTS': 'imagenet'},
 'EFFB6': {'IMG_SIZE': 528,
  'BATCH_SIZE': 8,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b6/efficientnetb6_notop.h5'}}

In [3]:
df['modality']

0       DX
1       CR
2       DX
3       CR
4       DX
        ..
6329    CR
6330    DX
6331    DX
6332    DX
6333    CR
Name: modality, Length: 6334, dtype: object

In [4]:
weight_for_negative = (1 / sum(df["class"] == "negative")) * (df.shape[0] / 4.0)
weight_for_typical = (1 / sum(df["class"] == "typical")) * (df.shape[0] / 4.0)
weight_for_indeterminate = (1 / sum(df["class"] == "indeterminate")) * (
    df.shape[0] / 4.0
)
weight_for_atypical = (1 / sum(df["class"] == "atypical")) * (df.shape[0] / 4.0)
non_negative = (1 / sum(df["class"] != "negative")) * (df.shape[0] / 2.0)
negative = (1 / sum(df["class"] == "negative")) * (df.shape[0] / 2.0)
class_weights = {
    "output1": [
        weight_for_negative,
        weight_for_typical,
        weight_for_indeterminate,
        weight_for_atypical,
    ],
    "output2": [negative, non_negative],
}
class_weights


{'output1': [0.9121543778801844,
  0.5266045892916528,
  1.4291516245487366,
  3.278467908902692],
 'output2': [1.8243087557603688, 0.6887777294475859]}

# train

In [5]:
df_dx = df.query('modality=="DX"').reset_index(drop=True)
df_cr = df.query('modality=="CR"').reset_index(drop=True)

## DX modality

In [6]:
skf = StratifiedKFold(
    n_splits=5, random_state=base_config["EFFB7"]["SEED"], shuffle=True
)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(df_dx, df_dx["class"]):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [7]:
policy = keras.mixed_precision.experimental.Policy("mixed_float16")
keras.mixed_precision.experimental.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale


In [8]:
for n in range(2,5):
    print("\n epoch #" + str(n) + "\n")
    val = df_dx.loc[val_ids[n]].sample(frac=1, random_state=base_config["EFFB7"]["SEED"])
    train = df_dx.loc[train_ids[n]].sample(
        frac=1, random_state=base_config["EFFB7"]["SEED"]
    )

    gen_train = Generator(
        df=train,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=base_config["EFFB7"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_npy_600/",
        shuffle=True,
        augment=False,
        hard_augment=True,
        trans_aug=False,
        flip_aug=False,
        n_inputs=1,
        n_classes=4,
        jpg=False,
        png=False,
        from_dicom=True,
    )
    gen_valid = Generator(
        df=val,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=base_config["EFFB7"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_npy_600/",
        shuffle=False,
        augment=False,
        hard_augment=False,
        flip_aug=False,
        n_inputs=1,
        n_classes=4,
        jpg=False,
        png=False,
        from_dicom=True,
    )

    get_m = GetModel(
        model_name="EFFB7",
        n_inputs=1,
        lr=0.0006,
        activation_func="softmax",
        weights=base_config["EFFB7"]["WEIGHTS"],
        n_classes=4,
        top_dropout_rate=None,
        auc=None
    )
    model = get_m.get_model()

    callbacks = get_m.make_callback(
        model_path="/app/_data/models/DX_CR/EffB7_HA_1/",
        model_name="EffB7_" + str(n) + ".h5",
        tensorboard_path="/app/.tensorboard/EffB7_HA_1" + str(n),
        patience_ES=12,
        patience_RLR=2,
        factor_LR=0.9,
        metric_for_monitor="val_acc",
        metric_mode="max",
    )

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=50,
        steps_per_epoch=len(train) // base_config["EFFB7"]["BATCH_SIZE"],
        validation_steps=len(val) // base_config["EFFB7"]["BATCH_SIZE"],
        verbose=1,
        workers=20,
        max_queue_size=500,
        callbacks=callbacks,
    )
    keras.backend.clear_session()


 epoch #2

  opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt)

Epoch 9/50





Epoch 00009: val_acc improved from -inf to 0.29412, saving model to /app/_data/models/DX_CR/EffB7_HA_1/EffB7_2.h5
Epoch 10/50

Epoch 00010: val_acc improved from 0.29412 to 0.46078, saving model to /app/_data/models/DX_CR/EffB7_HA_1/EffB7_2.h5
Epoch 11/50

Epoch 00011: val_acc improved from 0.46078 to 0.58170, saving model to /app/_data/models/DX_CR/EffB7_HA_1/EffB7_2.h5
Epoch 12/50

Epoch 00012: val_acc did not improve from 0.58170
Epoch 13/50

Epoch 00013: val_acc did not improve from 0.58170

Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.00043740003020502626.
Epoch 14/50

Epoch 00014: val_acc did not improve from 0.58170
Epoch 15/50

Epoch 00015: val_acc did not improve from 0.58170

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0003936600376619026.
Epoch 16/50

Epoch 00016: val_acc did not improve from 0.58170
Epoch 17/50

Epoch 00017: val_acc did not improve from 0.58170

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0003542940365150571.
Epo