### Model Updation with MGMT

In [1]:
from model import SwinUNETR

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model = SwinUNETR(
        img_size=(128, 128, 128),
        in_channels=4,
        out_channels=3,
        feature_size=48,
        use_checkpoint=False,
    )

### Delete samples without MGMT Labels

In [30]:
import os
import shutil
import json
import pandas as pd
import numpy as np

In [11]:
df = pd.read_csv("./data/mgmt_labels.csv")
id_ls = list(df["BraTS21ID"])

path = "./data/TrainingData/"
name_ls = os.listdir(path)

for name in name_ls:
    if "json" not in name:
        id_ = int(name[-5:])
        if id_ not in id_ls:
            shutil.rmtree(os.path.join(path, name))

### Update Data split json

In [32]:
with open("./data/TrainingData/brats21_folds.json", "r") as f:
    fold_dict = json.load(f)
    
count = 0
new_ls = []
for ls in fold_dict["training"]:
    id_ = int(ls["image"][0].split("/")[-1].split("_")[1])
    if id_ in id_ls:
        new_ls.append(ls)
        ls["mgmt_label"] = int(df[df["BraTS21ID"] == id_]["MGMT_value"].values[0])

fold_dict["training"] = new_ls

In [31]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


with open("./data/TrainingData/brats21_folds.json", 'w') as fp:
    json.dump(fold_dict, fp, cls=NpEncoder)

### Training Command

In [None]:
!python main.py --data_dir="./data" --val_every=1 --roi_x=96 --roi_y=96 --roi_z=96  --in_channels=4 --spatial_dims=3 \
    --use_checkpoint --feature_size=12 --max_epochs 200 --save_checkpoint --distributed

### Make JSON file for Data split

In [None]:
import os
import json

In [1]:
import shutil

shutil.make_archive("../SwinUNETR", "zip", "./")

'/data1/students/sainath/SwinUNETR.zip'

In [8]:
import os
os.path.getsize("../TestDataDICOM.zip")/(1024*1024*1024)

1.43603515625

## Unarchive Test Data

In [9]:
import shutil

shutil.unpack_archive("../TestDataDICOM.zip", "./data/TestDataDICOM", "zip")

## Test data JSON file

In [10]:
import glob
import os
import json

In [11]:
flair = ["/".join(i.split("/")[-3:]) for i in glob.glob("./data/ValidationData/*/*flair*")]
t1 = ["/".join(i.split("/")[-3:]) for i in glob.glob("./data/ValidationData/*/*t1.nii.gz")]
t1ce = ["/".join(i.split("/")[-3:]) for i in glob.glob("./data/ValidationData/*/*t1ce*")]
t2 = ["/".join(i.split("/")[-3:]) for i in glob.glob("./data/ValidationData/*/*t2*")]
ids = [i.split("/")[-2].split("_")[-1] for i in flair]

clf_ids = os.listdir("./data/TestDataDICOM")

test_dict = {"training" : []}

for i, id_ in enumerate(ids):
    #if id_ in clf_ids
        dict_ = {
            "fold" : 1,
            "image" : [
                flair[i],
                t1[i],
                t1ce[i],
                t2[i]
            ],
            "id" : id_
        }
        test_dict["training"].append(dict_)

In [12]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


with open("./jsons/val_data.json", 'w') as fp:
    json.dump(test_dict, fp, cls=NpEncoder)

## Inference

In [None]:
# https://github.com/Project-MONAI/tutorials/blob/main/modules/load_medical_images.ipynb

In [None]:
!python test.py --data_dir ./data --json_list ./jsons/metadata.json --feature_size 12 --roi_x=96 --roi_y=96 --roi_z=96 --pretrained_dir ./runs/alpha_0.9_enc3

## Get submission file

In [None]:
import glob
import os
import numpy as np
import pandas as pd

In [None]:
output_dir = "./runs/test"
path_ls = glob.glob(f"{output_dir}/outputs/*.npy")
sub_df = pd.read_csv("./data/sample_submission.csv")

for path in path_ls:
    pred = np.load(path)
    id_ = path.split("/")[-1].split("_")[0]
    pred = [1 if pred>0.5 else 0]
    sub_df[sub_df["BraTS21ID"] == id_]["MGMT_value"] = pred

sub_df.to_csv(f"{output_dir}/submission.csv")
sub_df.head()

## Compare Seg and Classification data Validation IDs

In [2]:
import os

clf_ids = os.listdir("./data/TestDataDICOM")
seg_ids = [i.split("_")[-1] for i in os.listdir("./data/ValidationData")]
count = 0

for id_ in seg_ids:
    if id_ in clf_ids:
        count += 1

count

86

## Final Data Split

In [None]:
#######################
### Total: 577
### Training: 450
### Validation: 50
### Test: 77
#######################

'''
{
    "training": [
        {
            "fold": 0,
            "image": [
                "TrainingData/BraTS2021_00483/BraTS2021_00483_flair.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t1ce.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t1.nii.gz",
                "TrainingData/BraTS2021_00483/BraTS2021_00483_t2.nii.gz"
            ],
            "label": "TrainingData/BraTS2021_00483/BraTS2021_00483_seg.nii.gz",
            "mgmt_label": 1
        },
        ...

}
'''

In [1]:
import os
import json
import glob

import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold

In [2]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [12]:
mgmt_df = pd.read_csv("../data/mgmt_labels.csv")
path_df = pd.DataFrame.from_dict({
    "BraTS21ID" : [int(i.split("/")[-2].split("_")[1]) for i in glob.glob(f"../data/TrainingData/*/*_seg*")],
    "t1" : [i[8:] for i in glob.glob(f"../data/TrainingData/*/*_t1.nii*")],
    "t1ce" : [i[8:] for i in glob.glob(f"../data/TrainingData/*/*_t1ce*")],
    "t2" : [i[8:] for i in glob.glob(f"../data/TrainingData/*/*_t2*")],
    "flair" : [i[8:] for i in glob.glob(f"../data/TrainingData/*/*_flair*")],
    "seg" : [i[8:] for i in glob.glob(f"../data/TrainingData/*/*_seg*")]
})

df = pd.merge(mgmt_df, path_df, on="BraTS21ID")

rand_ids = np.random.permutation(np.arange(len(df)))
train_df = df #df.iloc[rand_ids[:-77]]
test_df = df.iloc[rand_ids[-77:]]

skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(train_df, train_df["MGMT_value"])
ids, folds = [], []

for i, (train_index, test_index) in enumerate(skf.split(train_df, train_df["MGMT_value"])):
    ids.extend(list(train_df.iloc[test_index]["BraTS21ID"]))
    folds.extend([i]*len(test_index))
    
fold_df = pd.DataFrame.from_dict({
    "BraTS21ID" : ids,
    "fold" : folds
})
train_df = pd.merge(train_df, fold_df, on="BraTS21ID")
train_df.head()

Unnamed: 0,BraTS21ID,MGMT_value,t1,t1ce,t2,flair,seg,fold
0,0,1,TrainingData/BraTS2021_00000/BraTS2021_00000_t...,TrainingData/BraTS2021_00000/BraTS2021_00000_t...,TrainingData/BraTS2021_00000/BraTS2021_00000_t...,TrainingData/BraTS2021_00000/BraTS2021_00000_f...,TrainingData/BraTS2021_00000/BraTS2021_00000_s...,0
1,2,1,TrainingData/BraTS2021_00002/BraTS2021_00002_t...,TrainingData/BraTS2021_00002/BraTS2021_00002_t...,TrainingData/BraTS2021_00002/BraTS2021_00002_t...,TrainingData/BraTS2021_00002/BraTS2021_00002_f...,TrainingData/BraTS2021_00002/BraTS2021_00002_s...,0
2,3,0,TrainingData/BraTS2021_00003/BraTS2021_00003_t...,TrainingData/BraTS2021_00003/BraTS2021_00003_t...,TrainingData/BraTS2021_00003/BraTS2021_00003_t...,TrainingData/BraTS2021_00003/BraTS2021_00003_f...,TrainingData/BraTS2021_00003/BraTS2021_00003_s...,0
3,5,1,TrainingData/BraTS2021_00005/BraTS2021_00005_t...,TrainingData/BraTS2021_00005/BraTS2021_00005_t...,TrainingData/BraTS2021_00005/BraTS2021_00005_t...,TrainingData/BraTS2021_00005/BraTS2021_00005_f...,TrainingData/BraTS2021_00005/BraTS2021_00005_s...,0
4,6,1,TrainingData/BraTS2021_00006/BraTS2021_00006_t...,TrainingData/BraTS2021_00006/BraTS2021_00006_t...,TrainingData/BraTS2021_00006/BraTS2021_00006_t...,TrainingData/BraTS2021_00006/BraTS2021_00006_f...,TrainingData/BraTS2021_00006/BraTS2021_00006_s...,0


In [13]:
json_dict = {"training" : []}

for i in train_df.values:
    dict_ = {
        "fold" : 1, #i[-1]
        "image" : list(i[2:6]),
        "label" : i[-2],
        "mgmt_label" : i[1],
        "id" : i[-2].split("/")[-2].split("_")[1]
    }
    json_dict["training"].append(dict_)

with open("../jsons/metadata.json", 'w') as fp:
    json.dump(json_dict, fp, cls=NpEncoder)

In [7]:
json_dict = {"training" : []}

for i in test_df.values:
    dict_ = {
        "fold" : 1,
        "image" : list(i[2:6]),
        "label" : i[-1],
        "mgmt_label" : i[1],
        "id" : i[-1].split("/")[-2].split("_")[1]
    }
    json_dict["training"].append(dict_)

with open("./jsons/test_metadata.json", 'w') as fp:
    json.dump(json_dict, fp, cls=NpEncoder)

### Feature Vector check

In [2]:
import numpy as np

arr = np.load("../runs/alpha_0.1/outputs/00000.npy")
arr.shape

(1, 96)

### Zip folder except dataset and output

In [1]:
import zipfile
import os

In [2]:
zf = zipfile.ZipFile("../../source_code.zip", "w")

for dirname, subdirs, files in os.walk("../"):
    if 'exclude directory' in ["data", "runs"]:
        subdirs.remove('exclude directory')
    zf.write(dirname)
    for filename in files:
        zf.write(os.path.join(dirname, filename))
zf.close()

In [5]:
os.path.getsize("../../source_code.zip")/(1024*1024*1024)

6.651053890585899