In [None]:
import glob
import pandas as pd
import os
import pydicom
from tqdm import tqdm

def get_metadata(dcm_dir):
    dcm_paths = sorted(glob.glob(os.path.join(dcm_dir, "*.dcm")), key=lambda x: int(os.path.basename(x).replace(".dcm", "").replace("Image-", "")))
    
    df = {}
    for dcm_path in dcm_paths:
        img = pydicom.dcmread(str(dcm_path))
        for k in img:
            if k.name == "Percent Phase Field of View":
                df['Percent Phase Field of View'] = k.value
            elif k.name == "Echo Train Length":
                df['Echo Train Length'] = k.value
            elif k.name == "Series Description":
                df['Series Description'] = k.value

    df["shape"] = len(dcm_paths)

    new_df = {}
    for k, v in df.items():
        if k != "Series Description":
            new_df[df["Series Description"] + "_" + k] = v 
    
    return new_df

In [None]:
labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv',
                     dtype={'BraTS21ID': object})
X = []

for i in tqdm(range(len(labels))):
    idt = labels.loc[i,'BraTS21ID']

    flair = get_metadata(os.path.join('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train', idt, 'FLAIR'))
    t2w = get_metadata(os.path.join('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train', idt, 'T2w'))
        
    new_df = dict(**flair, **t2w)
    new_df["target"] = labels.loc[i,'MGMT_value']

    X.append(new_df)

X = pd.DataFrame(X)
X.to_csv("X_train.csv", index=False)

In [None]:
labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv',
                     dtype={'BraTS21ID': object})
X = []

for i in tqdm(range(len(labels))):
    idt = labels.loc[i,'BraTS21ID']

    flair = get_metadata(os.path.join('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test', idt, 'FLAIR'))
    t2w = get_metadata(os.path.join('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test', idt, 'T2w'))
        
    new_df = dict(**flair, **t2w)
    new_df["target"] = labels.loc[i,'MGMT_value']

    X.append(new_df)

X = pd.DataFrame(X)
X.to_csv("X_test.csv", index=False)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss
import numpy as np

X = pd.read_csv("X_train.csv", usecols=['T2w_Percent Phase Field of View',
                                        'FLAIR_Echo Train Length',
                                        'T2w_shape',
                                        'target'])
X.fillna(0, inplace=True)
y = X["target"].values
X.drop("target", axis=1, inplace=True)
X = X.values

X_test = pd.read_csv("X_test.csv", usecols=['T2w_Percent Phase Field of View',
                                             'FLAIR_Echo Train Length',
                                             'T2w_shape'])
X_test.fillna(0, inplace=True)
X_test = X_test.values

o = []
o2 = []
predicted_test = np.zeros(X_test.shape[0])
for fold, (train_index, val_index) in enumerate(StratifiedKFold(n_splits=200).split(X, y)):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    regr = LogisticRegression()
    regr.fit(X_train, y_train)
    
    y_pred = regr.predict_proba(X_val)[...,1]
    
    auc = roc_auc_score(y_val, y_pred)
    val_loss = log_loss(y_val, y_pred)
    
    o.append(auc)
    o2.append(val_loss)
    
    predicted_test += regr.predict_proba(X_test)[...,1]

print("Loss", np.mean(o2), np.std(o2))
print("AUC", np.mean(o), np.std(o))

predicted_test /= 200

In [None]:
labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv',
                     dtype={'BraTS21ID': object})
labels["MGMT_value"] = predicted_test
labels.to_csv("submission.csv", index=False)

In [None]:
labels["MGMT_value"].tolist()