In [1]:
import ast
import json
import os
import shutil

import albumentations
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pydicom as dicom
import tensorflow as tf
import tensorflow_addons as tfa
import torch
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow import keras
from tqdm import tqdm

from src.generator import Generator, GetModel

In [2]:
df = pd.read_csv("/app/_data/train.csv")
list_wrong = df[(df["class"] != "negative") & (df["label"] == "none 1 0 0 1 1")][
    "id_image"
].tolist()
df = df.query("id_image not in @list_wrong").reset_index(drop=True)

In [3]:
with open("/app/_data/base_config.json", "r") as f:
    base_config = json.load(f)
base_config["EFFB7"]["BATCH_SIZE"] = 4
base_config["EFFB7"]["SEED"]

42

In [4]:
labels = pd.read_csv("/app/_data/additional_dataset/labels_full.csv")
labels = labels.query("Target==0").reset_index(drop=True)
labels["image"] = labels["patientId"] + ".dcm"
labels["class"] = "negative"
labels = labels[["image", "class", "modality", "PatientSex", "BodyPartExamined"]]

# val only from df

In [5]:
skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
train_ids = []
val_ids = []
for train_index, valid_index in skf.split(df, df["class"]):
    train_ids.append(train_index)
    val_ids.append(valid_index)

In [17]:
for n in range(5):
    print('\n epoch #'+str(n)+'\n')
    val = df.loc[val_ids[n]].sample(frac=1, random_state=base_config["EFFB7"]["SEED"])
    train = pd.concat(
        [
            df.loc[
                train_ids[n],
                ["image", "class", "modality", "PatientSex", "BodyPartExamined"],
            ],
            labels.sample(3000),
        ],
        ignore_index=True,
        axis=0,
    ).sample(frac=1, random_state=base_config["EFFB7"]["SEED"])

    gen_train = Generator(
        df=train,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=base_config["EFFB7"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_jpg_600/",
        shuffle=True,
        augment=False,
        hard_augment=True,
        n_inputs=2,
        n_classes=4,
    )
    gen_valid = Generator(
        df=val,
        batch_size=base_config["EFFB7"]["BATCH_SIZE"],
        seed=base_config["EFFB7"]["SEED"],
        img_size=base_config["EFFB7"]["IMG_SIZE"],
        prepared_img_path="/app/_data/train_jpg_600/",
        shuffle=False,
        augment=False,
        hard_augment=False,
        n_inputs=2,
        n_classes=4,
    )

    model = GetModel(
        model_name="EFFB7", n_inputs=2, lr=0.0007, activation_func="softmax", weights=None
    ).get_model()
    policy = keras.mixed_precision.experimental.Policy("mixed_float16")
    keras.mixed_precision.experimental.set_policy(policy)

    callbacks = GetModel("EFFB7").make_callback(
        model_path="/app/_data/models/EffB7_4/",
        model_name="EffB7_" + str(n) + ".h5",
        tensorboard_path="/app/.tensorboard/EffB74_" + str(n),
        patience_ES=10,
        patience_RLR=3,
        factor_LR=0.8,
        metric_for_monitor="val_acc",
        metric_mode="max",
    )

    history = model.fit(
        gen_train,
        validation_data=gen_valid,
        epochs=50,
        steps_per_epoch=len(train_ids[0]) // base_config["EFFB7"]["BATCH_SIZE"] // 3,
        validation_steps=len(val_ids[0]) // base_config["EFFB7"]["BATCH_SIZE"] // 3,
        verbose=1,
        workers=20,
        max_queue_size=500,
        callbacks=callbacks,
    )
    keras.backend.clear_session()