In [None]:
import ast
import json
import os
import shutil

import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm
from numba import cuda
from src.generator import Generator, GetModel
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
df = pd.read_csv("/app/_data/train.csv")
list_wrong = df[(df["class"] != "negative") & (df["label"] == "none 1 0 0 1 1")][
    "id_image"
].tolist()
# df = df.query("id_image not in @list_wrong").reset_index(drop=True)

with open("/app/_data/base_config.json", "r") as f:
    base_config = json.load(f)
base_config["EFFB4"]["IMG_SIZE"] = 380
base_config["EFFB4"]["BATCH_SIZE"] = 24
base_config

{'EFFB7': {'IMG_SIZE': 600,
  'BATCH_SIZE': 4,
  'SEED': 42,
  'WEIGHTS': '/app/_data/noisy-student-efficientnet-b7/efficientnetb7_notop.h5'},
 'CLASS': {'negative': 0, 'typical': 1, 'indeterminate': 2, 'atypical': 3},
 'EFFB4': {'IMG_SIZE': 380,
  'BATCH_SIZE': 24,
  'SEED': 42,
  'WEIGHTS': '/app/_data/efficientnet-b4_noisy-student_notop.h5'}}

In [None]:
weight_for_negative = (1 / sum(df['class']=='negative')) * (df.shape[0] / 4.0)
weight_for_typical = (1 / sum(df['class']=='typical')) * (df.shape[0] / 4.0)
weight_for_indeterminate = (1 / sum(df['class']=='indeterminate')) * (df.shape[0] / 4.0)
weight_for_atypical = (1 / sum(df['class']=='atypical')) * (df.shape[0] / 4.0)
non_negative = (1 / sum(df['class']!='negative')) * (df.shape[0] / 2.0)
negative = (1 / sum(df['class']=='negative')) * (df.shape[0] / 2.0)
class_weights={'output1':[weight_for_negative,weight_for_typical,weight_for_indeterminate,weight_for_atypical],'output2':[negative,non_negative]}
class_weights
loss_weights={'output1':1,'output2':0.1}

{'output1': [0.9121543778801844,
  0.5266045892916528,
  1.4291516245487366,
  3.278467908902692],
 'output2': [1.8243087557603688, 0.6887777294475859]}

# train

In [4]:
skf = StratifiedKFold(n_splits=5, random_state=base_config["EFFB7"]["SEED"], shuffle=True)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(df, df["class"]):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [None]:
for n in range(5):
    print("\n epoch #" + str(n) + "\n")
    val = df.loc[val_ids[n]].sample(frac=1, random_state=base_config["EFFB4"]["SEED"])
    train = df.loc[train_ids[n]].sample(frac=1, random_state=base_config["EFFB4"]["SEED"])


    gen_train = Generator(
        df=train,
        batch_size=base_config["EFFB4"]["BATCH_SIZE"],
        seed=base_config["EFFB4"]["SEED"],
        img_size=base_config["EFFB4"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_npy_380/",
        shuffle=True,
        augment=False,
        hard_augment=False,
        trans_aug = False,
        flip_aug = False,
        n_inputs=1,
        n_classes=4,
        jpg = False,
        png=False,
        from_dicom=True
    )
    gen_valid = Generator(
        df=val,
        batch_size=base_config["EFFB4"]["BATCH_SIZE"],
        seed=base_config["EFFB4"]["SEED"],
        img_size=base_config["EFFB4"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_npy_380/",
        shuffle=False,
        augment=False,
        hard_augment=False,
        flip_aug = False,
        n_inputs=1,
        n_classes=4,
        jpg = False,
        png=False,
        from_dicom=True
    )

    get_m = GetModel(
    model_name="EFFB4",
    n_inputs=1,
    lr=0.001,
    activation_func="softmax",
#     weights=base_config["EFFB4"]["WEIGHTS"],
    weights='imagenet',
    n_classes=4,
    top_dropout_rate=None,

)
    model = get_m.get_model()

    callbacks = get_m.make_callback(
        model_path="/app/_data/models/EffB4_1in_380/",
        model_name="EffB4_" + str(n) + ".h5",
        tensorboard_path="/app/.tensorboard/EffB4_1in_380" + str(n),
        patience_ES=20,
        patience_RLR=2,
        factor_LR=0.7,
        metric_for_monitor="val_loss",
        metric_mode="min",
    )

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=50,
        steps_per_epoch=len(train) // base_config["EFFB4"]["BATCH_SIZE"],
        validation_steps=len(val) // base_config["EFFB4"]["BATCH_SIZE"],
        verbose=1,
        workers=20,
        max_queue_size=500,
        callbacks=callbacks,
    )
    keras.backend.clear_session()


 epoch #0

Epoch 1/50




 15/211 [=>............................] - ETA: 5:29 - loss: 2.3445 - acc: 0.3919 - auc: 0.5988

# evaluating

In [None]:
gen = Generator(
    df=df,
    batch_size=base_config["EFFB7"]["BATCH_SIZE"],
    seed=base_config["EFFB7"]["SEED"],
    img_size=base_config["EFFB7"]["IMG_SIZE"],
    prepared_img_path="/app/_data/train_jpg_600/",
    shuffle=False,
    augment=False,
    hard_augment=False,
    n_inputs=2,
    n_classes=4,
)

In [None]:
mod_path = '/app/_data/models/EffB7_init/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        print(file)

In [5]:
mod_path = '/app/_data/models/EffB7_init/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        model = keras.models.load_model(mod_path+file)
        model.evaluate(gen)
        keras.backend.clear_session()



In [5]:
mod_path = '/app/_data/models/EffB7_2/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        print(file)
        model = keras.models.load_model(mod_path+file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_3.h5
EffB7_0.h5
EffB7_1.h5
EffB7_2.h5
EffB7_4.h5


In [7]:
mod_path = '/app/_data/models/EffB7_comp_data_0_softmax/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        print(file)
        model = keras.models.load_model(mod_path+file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_c_1.h5
INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3090, compute capability 8.6
EffB7_c_2.h5
EffB7_c_3.h5
EffB7_c_4.h5
EffB7_c_0_067.h5
EffB7_c_0.h5


In [8]:
mod_path = '/app/_data/models/EffB7_3/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        print(file)
        model = keras.models.load_model(mod_path+file)
        model.evaluate(gen)
        keras.backend.clear_session()

EffB7_3.h5
EffB7_0.h5
EffB7_1.h5
EffB7_2.h5

KeyboardInterrupt: 

## evaluation 2 class

In [9]:
gen2 = Generator(
    df=df,
    batch_size=base_config["EFFB7"]["BATCH_SIZE"],
    seed=base_config["EFFB7"]["SEED"],
    img_size=base_config["EFFB7"]["IMG_SIZE"],
    prepared_img_path="/app/_data/train_jpg_600/",
    shuffle=False,
    augment=False,
    hard_augment=False,
    n_inputs=2,
    n_classes=2,
)

In [10]:
mod_path = '/app/_data/models/EffB7_2class_1/'
for file in os.listdir(mod_path):
    if '.h5' in file:
        print(file)
        model = keras.models.load_model(mod_path+file)
        model.evaluate(gen2)
        keras.backend.clear_session()

EffB7_2cl_2.h5
EffB7_2cl_1.h5
EffB7_2cl_0.h5
EffB7_2cl_4.h5
EffB7_2cl_3.h5
