In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from single_dataset import read_train_csv, DATA_PATH
from single_train import N_FOLDS, SEED, set_random_seed
from single_test import calculate_scores, calculate_condition_metrics
from genetic_algorithm import optimize

Model name: densenet201
result dir rsna24-data/models_db/densenet201-DB-c3p1b16e20f14


In [2]:
db_models_path = "/home/sadid-dl/PycharmProjects/rsna24-lsdc/rsna24-data/models_db/{}/submission.csv"
models_path = "/home/sadid-dl/PycharmProjects/rsna24-lsdc/rsna24-data/models/{}/submission.csv"
value_column = ["normal_mild", "moderate", "severe"]

In [3]:
set_random_seed(SEED)

In [4]:
df, solution, _ = read_train_csv(DATA_PATH)

In [5]:
skf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
train_desc = pd.read_csv(DATA_PATH / "train_series_descriptions.csv")

study_ids = np.array(df.study_id.unique())
scores = []
val_study_id = []

for fold, (trn_idx, val_idx) in enumerate(skf.split(range(len(study_ids)))):
    if fold != 0:
        continue
    print(f"Test fold {fold}")
    print("train size", len(trn_idx), "test size", len(val_idx))
    val_study_id = study_ids[val_idx]


Test fold 0
train size 1833 test size 141


In [6]:
fold_sol = solution.loc[solution.study_id.isin(val_study_id)].sort_values(by="row_id").reset_index(drop=True)
spinal_canal = fold_sol.row_id.str.contains("spinal_canal")
neural_foraminal = fold_sol.row_id.str.contains("neural_foraminal")
subarticular = fold_sol.row_id.str.contains("subarticular")
fold_sol = fold_sol[["row_id", "normal_mild", "moderate", "severe", "sample_weight"]]
print("total spinal", spinal_canal.sum(), "total sub", subarticular.sum(), "total neural", neural_foraminal.sum())
print("total normal_mild", fold_sol["normal_mild"].sum(), "total moderate", fold_sol["moderate"].sum(), "total severe", fold_sol["severe"].sum())
fold_sol.shape

total spinal 705 total sub 1410 total neural 1410
total normal_mild 2646.0 total moderate 598.0 total severe 281.0


(3525, 5)

In [7]:
fold_sol.head()

Unnamed: 0,row_id,normal_mild,moderate,severe,sample_weight
0,1004726367_left_neural_foraminal_narrowing_l1_l2,1.0,0.0,0.0,1.0
1,1004726367_left_neural_foraminal_narrowing_l2_l3,1.0,0.0,0.0,1.0
2,1004726367_left_neural_foraminal_narrowing_l3_l4,1.0,0.0,0.0,1.0
3,1004726367_left_neural_foraminal_narrowing_l4_l5,1.0,0.0,0.0,1.0
4,1004726367_left_neural_foraminal_narrowing_l5_s1,1.0,0.0,0.0,1.0


# Best accuracy for each conditions

## best spinal canal accuracy

In [8]:
sub_densenet201_db = pd.read_csv(db_models_path.format("densenet201-DB-c3p1b16e20f14"))
accuracy, precision, cm, s = calculate_scores(sub_densenet201_db, fold_sol)
print(accuracy, precision, s)
cm

0.6473758865248227 0.5986632130352175 0.7795870031833588


array([[1908,  611,  127],
       [ 238,  279,   81],
       [  70,  116,   95]])

In [9]:
accuracy, cm = calculate_condition_metrics(fold_sol, sub_densenet201_db, "spinal_canal", True)
print(accuracy)
cm

0.8156028368794326


array([[564,   0,  41],
       [ 48,   0,   5],
       [ 36,   0,  11]])

In [10]:
# (cm[0][0] / (cm[1][0] + cm[2][0]) / 3), (cm[1][1] / (cm[0][1] + cm[2][1]) * 2), (cm[2][2] / (cm[1][2] + cm[0][2]) * 4)
print((cm[0][0] + cm[1][1] + cm[2][2])  / np.sum(cm))
(cm[0][1] + cm[1][0] + cm[2][0] + cm[0][2] + cm[1][2] + cm[2][1])  / np.sum(cm)

0.8156028368794326


0.18439716312056736

In [11]:
optimize(fold_sol, sub_densenet201_db, "spinal_canal", value_column, population_size=10, n_generations=5, mutation_rate=0.1)

100%|██████████| 5/5 [00:00<00:00, 20.03it/s]

Best individual: [0.71553492 0.19971297 0.22479222]
Best fitness: -0.8581560283687943
Accuracy 0.8581560283687943





array([[605,   0,   0],
       [ 53,   0,   0],
       [ 47,   0,   0]])

## best subarticular accuracy

In [12]:
sub_densenet161 = pd.read_csv(models_path.format("densenet161-c3p1b16e20f14"))
accuracy, precision, cm, s = calculate_scores(sub_densenet161, fold_sol)
print(accuracy, precision, s)
cm

0.7407092198581561 0.6008971489931029 0.7810797800373651


array([[2349,  191,  106],
       [ 380,  185,   33],
       [ 109,   95,   77]])

In [13]:
accuracy, cm = calculate_condition_metrics(fold_sol, sub_densenet161, "subarticular", True)
print(accuracy)
cm

0.7127659574468085


array([[898,  55,  22],
       [191,  65,  12],
       [ 82,  43,  42]])

In [14]:
optimize(fold_sol, sub_densenet161, "subarticular", value_column, population_size=50, n_generations=20, mutation_rate=0.1)

100%|██████████| 20/20 [00:04<00:00,  4.10it/s]

Best individual: [0.80735625 0.4546022  0.47419877]
Best fitness: -0.7205673758865249
Accuracy 0.7205673758865249





array([[953,   9,  13],
       [234,  23,  11],
       [119,   8,  40]])

## best neural foraminal accuracy

In [15]:
sub_xception41_db = pd.read_csv(models_path.format("xception41-c3p1b16e20f14"))
accuracy, precision, cm, s = calculate_scores(sub_xception41_db, fold_sol)
print(accuracy, precision, s)
cm

0.7438297872340426 0.6343239731631869 0.7230042625644407


array([[2294,  154,  198],
       [ 307,  197,   94],
       [  74,   76,  131]])

In [16]:
accuracy, cm = calculate_condition_metrics(fold_sol, sub_xception41_db, "neural_foraminal", True)
print(accuracy)
cm

0.7978723404255319


array([[972,  86,   8],
       [135, 128,  14],
       [ 10,  32,  25]])

In [17]:
optimize(fold_sol, sub_xception41_db, "neural_foraminal", value_column, population_size=50, n_generations=20, mutation_rate=0.1)

100%|██████████| 20/20 [00:04<00:00,  4.08it/s]

Best individual: [0.89500223 0.90110027 0.88358764]
Best fitness: -0.798581560283688
Accuracy 0.798581560283688





array([[969,  89,   8],
       [131, 133,  13],
       [ 10,  33,  24]])

# combine the result

In [25]:
sub_spinal_canal = sub_densenet201_db.loc[sub_densenet201_db.row_id.str.contains("spinal_canal")]
sub_subarticular = sub_densenet161.loc[sub_densenet161.row_id.str.contains("subarticular")]
sub_neural_foraminal = sub_xception41_db.loc[sub_xception41_db.row_id.str.contains("neural_foraminal")]

sub = pd.concat([sub_spinal_canal, sub_neural_foraminal, sub_subarticular], axis=0)
accuracy, precision, cm, s = calculate_scores(sub, fold_sol)
print(accuracy, precision, s)
cm

0.7673758865248227 0.6312593392808913 0.7756013553017106


array([[2434,  141,   71],
       [ 374,  193,   31],
       [ 128,   75,   78]])

# Generating output from the best coefficient

In [26]:
sub_spinal_canal = sub_densenet201_db.loc[sub_densenet201_db.row_id.str.contains("spinal_canal")].copy()
sub_subarticular = sub_densenet161.loc[sub_densenet161.row_id.str.contains("subarticular")].copy()
sub_neural_foraminal = sub_xception41_db.loc[sub_xception41_db.row_id.str.contains("neural_foraminal")].copy()

spinal_canal_coef = [0.71553492, 0.19971297, 0.22479222]
subarticular_coef = [0.80735625, 0.4546022, 0.47419877]
neural_foraminal_coef = [0.89500223, 0.90110027, 0.88358764]

sub_spinal_canal[value_column] = sub_spinal_canal[value_column].values * spinal_canal_coef
sub_subarticular[value_column] = sub_subarticular[value_column].values * subarticular_coef
sub_neural_foraminal[value_column] = sub_neural_foraminal[value_column].values * neural_foraminal_coef

sub = pd.concat([sub_spinal_canal, sub_neural_foraminal, sub_subarticular], axis=0)
accuracy, precision, cm, s = calculate_scores(sub, fold_sol)
print(accuracy, precision, s)
cm

0.7792907801418439 0.6485962488160577 0.9269087789124617


array([[2527,   98,   21],
       [ 418,  156,   24],
       [ 176,   41,   64]])

In [28]:
print(calculate_condition_metrics(fold_sol, sub))

accuracy 0.7792907801418439 
normal   c/w 4.254 acc 0.955
moderate c/w 1.122 acc 0.261 
severe   c/w 1.422 acc 0.228


# Improving Spinal Canal

In [21]:
sub_densenet201_db = pd.read_csv(db_models_path.format("densenet201-DB-c3p1b16e20f14"))
sub_densenet201_db.head()

Unnamed: 0,row_id,normal_mild,moderate,severe
0,1004726367_left_neural_foraminal_narrowing_l1_l2,0.872883,0.11075,0.016367
1,1004726367_left_neural_foraminal_narrowing_l2_l3,0.756032,0.219034,0.024934
2,1004726367_left_neural_foraminal_narrowing_l3_l4,0.492753,0.388327,0.11892
3,1004726367_left_neural_foraminal_narrowing_l4_l5,0.353325,0.38196,0.264715
4,1004726367_left_neural_foraminal_narrowing_l5_s1,0.300521,0.415248,0.284231


In [22]:
sub_efficientnet_b0_db = pd.read_csv(db_models_path.format("efficientnet_b0-DB-c3p1b16e20f14"))
sub_efficientnet_b0_db.head()

Unnamed: 0,row_id,normal_mild,moderate,severe
0,1004726367_left_neural_foraminal_narrowing_l1_l2,0.776178,0.164194,0.059629
1,1004726367_left_neural_foraminal_narrowing_l2_l3,0.745312,0.187615,0.067073
2,1004726367_left_neural_foraminal_narrowing_l3_l4,0.62088,0.313907,0.065213
3,1004726367_left_neural_foraminal_narrowing_l4_l5,0.509696,0.325105,0.165199
4,1004726367_left_neural_foraminal_narrowing_l5_s1,0.513971,0.253372,0.232657


In [23]:
sub_efficient_b2_db = pd.read_csv(db_models_path.format("efficientnet_b2-DB-c3p1b16e20f14"))
sub_efficient_b2_db.head()

Unnamed: 0,row_id,normal_mild,moderate,severe
0,1004726367_left_neural_foraminal_narrowing_l1_l2,0.70111,0.17454,0.124351
1,1004726367_left_neural_foraminal_narrowing_l2_l3,0.77645,0.192427,0.031124
2,1004726367_left_neural_foraminal_narrowing_l3_l4,0.66742,0.27156,0.06102
3,1004726367_left_neural_foraminal_narrowing_l4_l5,0.65259,0.232052,0.115359
4,1004726367_left_neural_foraminal_narrowing_l5_s1,0.481094,0.299677,0.219229


In [24]:
values = (sub_densenet201_db[value_column].values * 0.4) + (sub_efficientnet_b0_db[value_column].values * 0.3) + (sub_efficient_b2_db[value_column] * 0.2)
sub = sub_efficient_b2_db.copy()
sub[value_column] = values
sub.head()

Unnamed: 0,row_id,normal_mild,moderate,severe
0,1004726367_left_neural_foraminal_narrowing_l1_l2,0.722228,0.128466,0.049305
1,1004726367_left_neural_foraminal_narrowing_l2_l3,0.681296,0.182383,0.03632
2,1004726367_left_neural_foraminal_narrowing_l3_l4,0.516849,0.303815,0.079336
3,1004726367_left_neural_foraminal_narrowing_l4_l5,0.424757,0.296726,0.178517
4,1004726367_left_neural_foraminal_narrowing_l5_s1,0.370618,0.302046,0.227335
