In [1]:
import ast
import json
import os
import shutil

import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from IPython.core.interactiveshell import InteractiveShell
from numba import cuda
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm

from src.augment import Aug, Aug_Crop, Aug_No_transform, Flip_Aug
from src.generator import Generator, GetModel

InteractiveShell.ast_node_interactivity = "all"

In [2]:
df = pd.read_csv("/app/_data/predicted_crop_v1.csv")
list_wrong = df[(df["class"] != "negative") & (df["label"] == "none 1 0 0 1 1")][
    "id_image"
].tolist()
df = df.query("id_image not in @list_wrong").reset_index(drop=True)

with open("/app/_data/base_config.json", "r") as f:
    base_config = json.load(f)
base_config["EFFB7"]["SEED"] = 42
base_config
MOD='EFFB6'

{'EFFB7': {'IMG_SIZE': 600,
  'BATCH_SIZE': 4,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b7/efficientnetb7_notop.h5'},
 'CLASS': {'negative': 0, 'typical': 1, 'indeterminate': 2, 'atypical': 3},
 'EFFB4': {'IMG_SIZE': 380,
  'BATCH_SIZE': 50,
  'SEED': 42,
  'WEIGHTS': '/app/_data/efficientnet-b4_noisy-student_notop.h5'},
 'EFFB0': {'IMG_SIZE': 224,
  'BATCH_SIZE': 120,
  'SEED': 42,
  'WEIGHTS': 'imagenet'},
 'EFFB6': {'IMG_SIZE': 528,
  'BATCH_SIZE': 8,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b6/efficientnetb6_notop.h5'}}

# train

In [3]:
# policy = keras.mixed_precision.experimental.Policy("mixed_float16")
# keras.mixed_precision.experimental.set_policy(policy)

In [4]:
skf = StratifiedKFold(
    n_splits=5, random_state=base_config[MOD]["SEED"], shuffle=True
)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(df, df["class"]):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [5]:
for n in range(0, 5):
    print("\n epoch #" + str(n) + "\n")
    val = df.loc[val_ids[n]].sample(frac=1, random_state=base_config[MOD]["SEED"])
    train = df.loc[train_ids[n]].sample(
        frac=1, random_state=base_config[MOD]["SEED"]
    )

    gen_train = Generator(
        df=train,
        batch_size=base_config[MOD]["BATCH_SIZE"],
        seed=base_config[MOD]["SEED"],
        img_size=base_config[MOD]["IMG_SIZE"],
        cache_img_path="/app/_data/crop_npy_528/",
        shuffle=True,
        label_columns=[
            "Negative for Pneumonia",
            "Typical Appearance",
            "Indeterminate Appearance",
            "Atypical Appearance",
        ],
        augment_fn=Flip_Aug,
        crop=True,
    )
    gen_valid = Generator(
        df=val,
        batch_size=base_config[MOD]["BATCH_SIZE"],
        seed=base_config[MOD]["SEED"],
        img_size=base_config[MOD]["IMG_SIZE"],
        cache_img_path="/app/_data/crop_npy_528/",
        shuffle=False,
        label_columns=[
            "Negative for Pneumonia",
            "Typical Appearance",
            "Indeterminate Appearance",
            "Atypical Appearance",
        ],
        augment_fn=None,
        crop=True,
    )

    get_m = GetModel(
        model_name=MOD,
        lr=0.0005,
        activation_func="softmax",
        weights='imagenet',
        n_classes=4,
        top_dropout_rate=None,
        loss=['bce', keras.losses.CategoricalCrossentropy()],
        metrics = ["acc", keras.metrics.AUC(multi_label=True)]
    )
    model = get_m.get_model()

    callbacks = get_m.make_callback(
        model_path="/app/_data/models/EffB6_cropped/",
        model_name="EffB6_1_" + str(n) + ".h5",
        tensorboard_path="/app/.tensorboard/EffB6_cr1_" + str(n),
        patience_ES=12,
        patience_RLR=2,
        factor_LR=0.7,
        metric_for_monitor="val_loss",
        metric_mode="min",
    )

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=50,
        steps_per_epoch=len(train) // base_config[MOD]["BATCH_SIZE"],
        validation_steps=len(val) // base_config[MOD]["BATCH_SIZE"],
        verbose=1,
        workers=20,
        max_queue_size=500,
        callbacks=callbacks,
    )
    keras.backend.clear_session()


 epoch #0

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.44972, saving model to /app/_data/models/EffB6_cropped/EffB6_1_0.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.44972 to 0.38247, saving model to /app/_data/models/EffB6_cropped/EffB6_1_0.h5
Epoch 3/50

Epoch 00003: val_loss improved from 0.38247 to 0.37663, saving model to /app/_data/models/EffB6_cropped/EffB6_1_0.h5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.37663
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.37663

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.37663
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.37663

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0004900000232737511.
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.37663
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.37663

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00034

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import average_precision_score
score = 0
for x in range(4): score += average_precision_score(tst_lbls[:, x], tst_outs[:, x]) / 4 * 2/3

# evaluating

In [None]:
gen = Generator(
    df=df,
    batch_size=base_config["EFFB7"]["BATCH_SIZE"],
    seed=base_config["EFFB7"]["SEED"],
    img_size=base_config["EFFB7"]["IMG_SIZE"],
    prepared_img_path="/app/_data/train_jpg_600/",
    shuffle=False,
    augment=False,
    hard_augment=False,
    n_inputs=2,
    n_classes=4,
)

In [None]:
mod_path = "/app/_data/models/EffB7_init/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        print(file)

In [5]:
mod_path = "/app/_data/models/EffB7_init/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        model = keras.models.load_model(mod_path + file)
        model.evaluate(gen)
        keras.backend.clear_session()



In [5]:
mod_path = "/app/_data/models/EffB7_2/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        print(file)
        model = keras.models.load_model(mod_path + file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_3.h5
EffB7_0.h5
EffB7_1.h5
EffB7_2.h5
EffB7_4.h5


In [7]:
mod_path = "/app/_data/models/EffB7_comp_data_0_softmax/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        print(file)
        model = keras.models.load_model(mod_path + file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_c_1.h5
INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6
EffB7_c_2.h5
EffB7_c_3.h5
EffB7_c_4.h5
EffB7_c_0_067.h5
EffB7_c_0.h5


In [8]:
mod_path = "/app/_data/models/EffB7_3/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        print(file)
        model = keras.models.load_model(mod_path + file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_3.h5
EffB7_0.h5
EffB7_1.h5
EffB7_2.h5

KeyboardInterrupt: 

## evaluation 2 class

In [9]:
gen2 = Generator(
    df=df,
    batch_size=base_config["EFFB7"]["BATCH_SIZE"],
    seed=base_config["EFFB7"]["SEED"],
    img_size=base_config["EFFB7"]["IMG_SIZE"],
    prepared_img_path="/app/_data/train_jpg_600/",
    shuffle=False,
    augment=False,
    hard_augment=False,
    n_inputs=2,
    n_classes=2,
)

In [10]:
mod_path = "/app/_data/models/EffB7_2class_1/"
for file in os.listdir(mod_path):
    if ".h5" in file:
        print(file)
        model = keras.models.load_model(mod_path + file)
        model.evaluate(gen2)
        keras.backend.clear_session()

EffB7_2cl_2.h5
EffB7_2cl_1.h5
EffB7_2cl_0.h5
EffB7_2cl_4.h5
EffB7_2cl_3.h5
