In [14]:
import math
import warnings

import torch
import torchinfo
from pandas.errors import SettingWithCopyWarning
from torch import nn
from torch.utils.data import DataLoader
import timm
import glob
import os
import numpy as np
import pandas as pd
from glob import glob
import tqdm
from torch.utils.data import Dataset
import albumentations as A
import pydicom
from single_dataset import DATA_PATH
from single_train import set_random_seed

In [15]:
SEED = 8620
N_FOLDS = 14

set_random_seed(SEED)

In [16]:
train_merged = pd.read_csv(DATA_PATH / "temp_train.csv")

In [17]:
train_merged.loc[(train_merged.plane == "Sagittal T1") & (train_merged.condition == "Spinal Canal Stenosis"), "plane"] = "Sagittal T2/STIR"

In [18]:
dist = train_merged[["plane", "condition", "level", "instance_number", "label"]].groupby(["plane", "condition", "level", "label"]).count().reset_index()
print(dist.to_string())

               plane                         condition  level        label  instance_number
0           Axial T2        Left Subarticular Stenosis  L1/L2     Moderate               93
1           Axial T2        Left Subarticular Stenosis  L1/L2  Normal/Mild             1688
2           Axial T2        Left Subarticular Stenosis  L1/L2       Severe               28
3           Axial T2        Left Subarticular Stenosis  L2/L3     Moderate              254
4           Axial T2        Left Subarticular Stenosis  L2/L3  Normal/Mild             1554
5           Axial T2        Left Subarticular Stenosis  L2/L3       Severe               83
6           Axial T2        Left Subarticular Stenosis  L3/L4     Moderate              454
7           Axial T2        Left Subarticular Stenosis  L3/L4  Normal/Mild             1323
8           Axial T2        Left Subarticular Stenosis  L3/L4       Severe              193
9           Axial T2        Left Subarticular Stenosis  L4/L5     Moderate      

In [20]:
from sklearn.model_selection import KFold

skf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)
fold_score = []
study_ids = np.array(train_merged.study_id.unique())

df_train = pd.DataFrame()
df_valid = pd.DataFrame()

for fold, (trn_idx, val_idx) in enumerate(skf.split(range(len(study_ids)))):
    print("train size", len(trn_idx), "test size", len(val_idx))
    trx_study_id = study_ids[trn_idx]
    val_study_id = study_ids[val_idx]

    df_train = train_merged.loc[train_merged.study_id.isin(trx_study_id)]
    df_valid = train_merged.loc[train_merged.study_id.isin(val_study_id)]
    break

train size 1833 test size 141


In [21]:
df_train[["plane", "condition", "level", "instance_number", "label"]].groupby(["plane", "condition", "level", "label"]).count().reset_index()

Unnamed: 0,plane,condition,level,label,instance_number
0,Axial T2,Left Subarticular Stenosis,L1/L2,Moderate,86
1,Axial T2,Left Subarticular Stenosis,L1/L2,Normal/Mild,1573
2,Axial T2,Left Subarticular Stenosis,L1/L2,Severe,21
3,Axial T2,Left Subarticular Stenosis,L2/L3,Moderate,239
4,Axial T2,Left Subarticular Stenosis,L2/L3,Normal/Mild,1443
...,...,...,...,...,...
70,Sagittal T2/STIR,Spinal Canal Stenosis,L4/L5,Normal/Mild,1379
71,Sagittal T2/STIR,Spinal Canal Stenosis,L4/L5,Severe,228
72,Sagittal T2/STIR,Spinal Canal Stenosis,L5/S1,Moderate,44
73,Sagittal T2/STIR,Spinal Canal Stenosis,L5/S1,Normal/Mild,1767


In [22]:
df_valid[["plane", "condition", "level", "instance_number", "label"]].groupby(["plane", "condition", "level", "label"]).count().reset_index()

Unnamed: 0,plane,condition,level,label,instance_number
0,Axial T2,Left Subarticular Stenosis,L1/L2,Moderate,7
1,Axial T2,Left Subarticular Stenosis,L1/L2,Normal/Mild,115
2,Axial T2,Left Subarticular Stenosis,L1/L2,Severe,7
3,Axial T2,Left Subarticular Stenosis,L2/L3,Moderate,15
4,Axial T2,Left Subarticular Stenosis,L2/L3,Normal/Mild,111
...,...,...,...,...,...
68,Sagittal T2/STIR,Spinal Canal Stenosis,L4/L5,Normal/Mild,102
69,Sagittal T2/STIR,Spinal Canal Stenosis,L4/L5,Severe,26
70,Sagittal T2/STIR,Spinal Canal Stenosis,L5/S1,Moderate,7
71,Sagittal T2/STIR,Spinal Canal Stenosis,L5/S1,Normal/Mild,133
