In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from single_dataset import read_train_csv, DATA_PATH
from single_train import N_FOLDS, SEED, set_random_seed
from single_test import calculate_scores, calculate_condition_metrics
from genetic_algorithm import optimize

Model name: densenet201
result dir None


In [2]:
db_models_path = "/home/sadid-dl/PycharmProjects/rsna24-lsdc/rsna24-data/models_db/{}/submission.csv"
models_path = "/home/sadid-dl/PycharmProjects/rsna24-lsdc/rsna24-data/models/{}/submission.csv"
value_column = ["normal_mild", "moderate", "severe"]

In [3]:
set_random_seed(SEED)

In [4]:
df, solution, _ = read_train_csv(DATA_PATH)

In [5]:
skf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
train_desc = pd.read_csv(DATA_PATH / "train_series_descriptions.csv")

study_ids = np.array(df.study_id.unique())
scores = []
val_study_id = []

for fold, (trn_idx, val_idx) in enumerate(skf.split(range(len(study_ids)))):
    if fold != 0:
        continue

    print(f"Test fold {fold}")
    print("train size", len(trn_idx), "test size", len(val_idx))
    val_study_id = study_ids[val_idx]

Test fold 0
train size 1833 test size 141


In [6]:
fold_sol = solution.loc[solution.study_id.isin(val_study_id)].sort_values(by="row_id").reset_index(drop=True)
spinal_canal = fold_sol.row_id.str.contains("spinal_canal")
neural_foraminal = fold_sol.row_id.str.contains("neural_foraminal")
subarticular = fold_sol.row_id.str.contains("subarticular")
fold_sol = fold_sol[["row_id", "normal_mild", "moderate", "severe", "sample_weight"]]
print("total spinal", spinal_canal.sum(), "total sub", subarticular.sum(), "total neural", neural_foraminal.sum())
print("total normal_mild", fold_sol["normal_mild"].sum(), "total moderate", fold_sol["moderate"].sum(), "total severe", fold_sol["severe"].sum())
fold_sol.shape

total spinal 705 total sub 1410 total neural 1410
total normal_mild 2646.0 total moderate 598.0 total severe 281.0


(3525, 5)

In [7]:
sub_densenet201_db = pd.read_csv(db_models_path.format("densenet201-DB-c3p1b16e20f14"))
accuracy, precision, cm, s = calculate_scores(sub_densenet201_db, fold_sol)
print(accuracy, precision, s)
cm

0.6473758865248227 0.5986632130352175 0.7795870031833588


array([[1908,  611,  127],
       [ 238,  279,   81],
       [  70,  116,   95]])

In [10]:
optimize(fold_sol, sub_densenet201_db, "spinal_canal", value_column, population_size=10, n_generations=5, mutation_rate=0.1)

100%|██████████| 5/5 [00:00<00:00, 20.74it/s]

Best individual: [0.50067352 0.92748053 0.00317369]
Best fitness: -0.8595744680851064
Accuracy 0.8595744680851064





array([[605,   0,   0],
       [ 52,   1,   0],
       [ 45,   2,   0]])

In [8]:
sub_efficientnet_b2_db = pd.read_csv(db_models_path.format("efficientnet_b2-DB-c3p1b16e20f14"))
accuracy, precision, cm, s = calculate_scores(sub_efficientnet_b2_db, fold_sol)
print(accuracy, precision, s)
cm

0.6533333333333333 0.6320141633865707 0.8452039033084205


array([[1890,  385,  371],
       [ 202,  260,  136],
       [  35,   93,  153]])

In [9]:
optimize(fold_sol, sub_efficientnet_b2_db, "spinal_canal", value_column, population_size=10, n_generations=5, mutation_rate=0.1)

100%|██████████| 5/5 [00:00<00:00, 19.74it/s]

Best individual: [0.80270584 0.10804183 0.22479222]
Best fitness: -0.851063829787234
Accuracy 0.851063829787234





array([[559,   0,  46],
       [ 34,   0,  19],
       [  6,   0,  41]])