<a href="https://colab.research.google.com/github/mille055/AIPI540_individual_project/blob/main/notebooks/AIPI540_IP_fusion_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href='https://ai.meng.duke.edu'> = <img align="left" style="padding-top:10px;" src=https://storage.googleapis.com/aipi_datasets/Duke-AIPI-Logo.png>

In [1]:
COLAB_FLAG = False   # whether running on colab or locally on computer

In [2]:
if COLAB_FLAG:
    !pip install pydicom==2.1.2
    !pip install monai seaborn
    !git clone 'https://github.com/mille055/AIPI540_individual_project.git'

    


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import torchvision
import pydicom
import monai
import pickle
import glob
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import MinMaxScaler

import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import time
import os
import copy

import monai
from monai.data import DataLoader, ImageDataset
from monai.transforms import AddChannel, Compose, RandRotate90, Resize, ScaleIntensity, EnsureType
from pydicom.dataset import Dataset as DcmDataset
from pydicom.tag import BaseTag as DcmTag
from pydicom.multival import MultiValue as DcmMultiValue
import sys
import warnings
warnings.filterwarnings("ignore")

In [4]:
#local imports
if COLAB_FLAG:
    sys.path.append('/content/AIPI540_individual_project/scripts/')

else: # running locally
    sys.path.append('/Users/cmm/Documents/GitHub/AIPI540_individual_project/scripts/')

### local imports ###
from config import new_dict, file_dict, feats, column_lists
from config import abd_label_dict, val_list, train_val_split_percent, random_seed, data_transforms
from config import sentence_encoder, series_description_column, text_label
from utils import *

# from AIPI540_individual_project.scripts.train_pixel_model import train_model
# from AIPI540_individual_project.scripts.train_text_model import load_text_data, train_text_model, list_incorrect_text_predictions
# from AIPI540_individual_project.scripts.utils import *

2023-04-06 17:15:07,769 - NumExpr defaulting to 8 threads.


# Load Datasets


In [5]:
if COLAB_FLAG:
    train_datafile = '/content/AIPI540_individual_project/data/trainfiles.csv'
    test_datafile = '/content/AIPI540_individual_project/data/testfiles.csv'
else:
    # for local
    train_datafile = '../data/trainfiles.csv'
    test_datafile = '../data/testfiles.csv'

In [6]:
# def load_csv_dataset(train_file, test_file, val = True, val_lists = None):
#     train_df = pd.read_csv(train_file)
#     test_df = pd.read_csv(test_file)
#     train_df.drop('Unnamed: 0', axis=1, inplace=True)
#     test_df.drop('Unnamed: 0', axis=1, inplace=True)
#     if val:
#         if val_lists:
#             val_df = train_df[train_df.patientID.isin(val_lists)]
#             train_df = train_df[~train_df.index.isin(val_df.index)] 
#         else:
#             train_set, val_set = next(GroupShuffleSplit(test_size=.20, n_splits=1, random_state = 42).split(train_df, groups=train_df['patientID']))
#             train_df, val_df = train_set, val_set
#         return train_df, val_df, test_df

#     else: 
#         return train_df, test_df


In [6]:
train, val, test = load_csv_dataset(train_datafile, test_datafile, val_lists = val_list)
train = shorten_df(train)
val = shorten_df(val)
test = shorten_df(test)

In [7]:

train


Unnamed: 0,label,patientID,series,file_info
0,8,104,18(35-68),/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (35-68)/0052.dcm
1,8,104,18(1-34),/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (1-34)/0018.dcm
2,16,104,20,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/20/0037.dcm
3,9,104,9,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/9/0017.dcm
4,9,104,11,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/11/0017.dcm
...,...,...,...,...
1407,7,94,3,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/3/0015.dcm
1408,2,94,12,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/12/0037.dcm
1409,2,94,13,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/13/0037.dcm
1410,19,94,5,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/5/0037.dcm


In [9]:
# import importlib
# import utils

# importlib.reload(utils)

In [8]:
def prepare_df(df):
    df1 = df.copy()
    filenames = df1.file_info.tolist()
    getdicoms = pd.DataFrame.from_dicoms(filenames)
    merged = getdicoms.merge(df1, left_on='fname', right_on='file_info')
    merged.drop(columns=['file_info'], inplace=True)
    artpooled = pool_arterial_labels(merged)
    artpooled['contrast'] = artpooled.apply(detect_contrast)
    artpooled['plane'] = artpooled.apply(compute_plane)


    return artpooled

In [9]:

train_df = prepare_df(train)
val_df = prepare_df(val)
test_df = prepare_df(test)



In [10]:
train_df

Unnamed: 0,ImageType,SOPClassUID,SeriesDescription,ContrastBolusAgent,BodyPartExamined,ScanningSequence,SequenceVariant,ScanOptions,MRAcquisitionType,AngioFlag,...,InstanceNumber,ImageOrientationPatient,PhotometricInterpretation,PixelSpacing,fname,TriggerTime,InversionTime,label,patientID,series
0,"[DERIVED, PRIMARY, DIFFUSION, TRACEW, DIS2D]",MR Image Storage,ax diff_TRACEW,Multihance,ABDOMEN,EP,"[SK, SP]","[PFP, SFS]",2D,N,...,52,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.9375, 0.9375]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (35-68)/0052.dcm,,,8,104,18(35-68)
1,"[DERIVED, PRIMARY, DIFFUSION, TRACEW, DIS2D]",MR Image Storage,ax diff_TRACEW,Multihance,ABDOMEN,EP,"[SK, SP]","[PFP, SFS]",2D,N,...,18,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.9375, 0.9375]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (1-34)/0018.dcm,,,8,104,18(1-34)
2,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax equilibrium,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.703125, 0.703125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/20/0037.dcm,,,16,104,20
3,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax haste bh,,ABDOMEN,SE,"[SK, SP, OSP]","[PFP, SAT2, SFS]",2D,N,...,17,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/9/0017.dcm,,,9,104,9
4,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax t2 triggerred,,ABDOMEN,SE,"[SK, SP]","[PFP, SAT2, FS]",2D,N,...,17,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/11/0017.dcm,,,9,104,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,cor haste,,ABDOMEN,SE,"[SK, SP, OSP]",PFP,2D,N,...,15,"[1, 0, 0, 0, 0, -1]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/3/0015.dcm,,,7,94,3
1408,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dyn,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.78125, 0.78125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/12/0037.dcm,,,2,94,12
1409,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dyn,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.78125, 0.78125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/13/0037.dcm,,,2,94,13
1410,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dixon_opp,,ABDOMEN,GR,SP,"[PFP, SAT2]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/5/0037.dcm,,,19,94,5


In [13]:
train_merge.label.value_counts()

9     140
8     137
25    136
2     124
18    106
16     92
11     74
14     73
19     71
7      70
23     67
17     66
0      61
6      56
3      40
12     24
4      22
21     13
22     11
26     10
13      6
29      6
1       2
15      1
10      1
5       1
20      1
28      1
Name: label, dtype: int64

In [21]:
train_merge

Unnamed: 0,ImageType,SOPClassUID,SeriesDescription,ContrastBolusAgent,BodyPartExamined,ScanningSequence,SequenceVariant,ScanOptions,MRAcquisitionType,AngioFlag,...,InstanceNumber,ImageOrientationPatient,PhotometricInterpretation,PixelSpacing,fname,TriggerTime,InversionTime,label,patientID,series
0,"[DERIVED, PRIMARY, DIFFUSION, TRACEW, DIS2D]",MR Image Storage,ax diff_TRACEW,Multihance,ABDOMEN,EP,"[SK, SP]","[PFP, SFS]",2D,N,...,52,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.9375, 0.9375]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (35-68)/0052.dcm,,,8,104,18(35-68)
1,"[DERIVED, PRIMARY, DIFFUSION, TRACEW, DIS2D]",MR Image Storage,ax diff_TRACEW,Multihance,ABDOMEN,EP,"[SK, SP]","[PFP, SFS]",2D,N,...,18,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.9375, 0.9375]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (1-34)/0018.dcm,,,8,104,18(1-34)
2,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax equilibrium,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.703125, 0.703125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/20/0037.dcm,,,16,104,20
3,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax haste bh,,ABDOMEN,SE,"[SK, SP, OSP]","[PFP, SAT2, SFS]",2D,N,...,17,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/9/0017.dcm,,,9,104,9
4,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax t2 triggerred,,ABDOMEN,SE,"[SK, SP]","[PFP, SAT2, FS]",2D,N,...,17,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/11/0017.dcm,,,9,104,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,cor haste,,ABDOMEN,SE,"[SK, SP, OSP]",PFP,2D,N,...,15,"[1, 0, 0, 0, 0, -1]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/3/0015.dcm,,,7,94,3
1408,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dyn,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.78125, 0.78125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/12/0037.dcm,,,2,94,12
1409,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dyn,Multihance,ABDOMEN,GR,"[SP, OSP]","[PFP, FS]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[0.78125, 0.78125]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/13/0037.dcm,,,2,94,13
1410,"[ORIGINAL, PRIMARY, M, NORM, DIS2D]",MR Image Storage,ax dixon_opp,,ABDOMEN,GR,SP,"[PFP, SAT2]",3D,N,...,37,"[1, 0, 0, 0, 1, 0]",MONOCHROME2,"[1.5625, 1.5625]",/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/5/0037.dcm,,,19,94,5


In [20]:
train_merge.columns

Index(['ImageType', 'SOPClassUID', 'SeriesDescription', 'ContrastBolusAgent',
       'BodyPartExamined', 'ScanningSequence', 'SequenceVariant',
       'ScanOptions', 'MRAcquisitionType', 'AngioFlag', 'SliceThickness',
       'RepetitionTime', 'EchoTime', 'MagneticFieldStrength',
       'EchoTrainLength', 'StudyInstanceUID', 'SeriesInstanceUID',
       'SeriesNumber', 'AcquisitionNumber', 'InstanceNumber',
       'ImageOrientationPatient', 'PhotometricInterpretation', 'PixelSpacing',
       'fname', 'TriggerTime', 'InversionTime', 'label', 'patientID',
       'series'],
      dtype='object')

In [21]:
train_processed = preprocess(train_merge)
train_processed

Preprocessing metadata for Random Forest classifier.
Have received 1412 entries.


Unnamed: 0,fname,StudyInstanceUID,SeriesInstanceUID,SeriesNumber,SeriesDescription,AcquisitionNumber,InstanceNumber,ImageOrientationPatient,MRAcquisitionType,AngioFlag,...,opt_M,opt_MT,opt_MT_GEMS,opt_PROP_GEMS,opt_R,opt_RAMP_IS_GEMS,opt_SAT1,opt_T2FLAIR_GEMS,opt_VB_GEMS,opt__
0,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (35-68)/0052.dcm,1.1.8.4.1.6.82337.2.1.1.0708013300075333860875050288577,1.5.1.5.3.4.42814.2.1.1.81476774016713805121083713442731,18,ax diff_TRACEW,1.0,52,"[1, 0, 0, 0, 1, 0]",0,1,...,0,0,0,0,0,0,0,0,0,0
1,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (1-34)/0018.dcm,1.1.8.4.1.6.82337.2.1.1.0708013300075333860875050288577,1.5.1.5.3.4.42814.2.1.1.81476774016713805121083713442731,18,ax diff_TRACEW,1.0,18,"[1, 0, 0, 0, 1, 0]",0,1,...,0,0,0,0,0,0,0,0,0,0
2,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/20/0037.dcm,1.1.8.4.1.6.82337.2.1.1.0708013300075333860875050288577,1.5.1.5.3.4.42814.2.1.1.04131462062642286243287442333661,20,ax equilibrium,1.0,37,"[1, 0, 0, 0, 1, 0]",1,1,...,0,0,0,0,0,0,0,0,0,0
3,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/9/0017.dcm,1.1.8.4.1.6.82337.2.1.1.0708013300075333860875050288577,1.5.1.5.3.4.42814.2.1.1.82437672634667806116366638011114,9,ax haste bh,1.0,17,"[1, 0, 0, 0, 1, 0]",0,1,...,0,0,0,0,0,0,0,0,0,0
4,/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/11/0017.dcm,1.1.8.4.1.6.82337.2.1.1.0708013300075333860875050288577,1.5.1.5.3.4.42814.2.1.1.18541470475235807143366788873886,11,ax t2 triggerred,1.0,17,"[1, 0, 0, 0, 1, 0]",0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/3/0015.dcm,6.4.2.8.2.7.53708.2.1.1.3837268650553342350621875478481,0.3.4.1.7.3.18404.2.1.1.30504247673163608470362631276851,3,cor haste,1.0,15,"[1, 0, 0, 0, 0, -1]",0,1,...,0,0,0,0,0,0,0,0,0,0
1408,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/12/0037.dcm,6.4.2.8.2.7.53708.2.1.1.3837268650553342350621875478481,0.3.4.1.7.3.18404.2.1.1.58154752405583585275757005610747,12,ax dyn,1.0,37,"[1, 0, 0, 0, 1, 0]",1,1,...,0,0,0,0,0,0,0,0,0,0
1409,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/13/0037.dcm,6.4.2.8.2.7.53708.2.1.1.3837268650553342350621875478481,0.3.4.1.7.3.18404.2.1.1.01530466514228680554687652718480,13,ax dyn,2.0,37,"[1, 0, 0, 0, 1, 0]",1,1,...,0,0,0,0,0,0,0,0,0,0
1410,/volumes/cm7/Abdominal_MRI_dataset_split/train/094/exam1/5/0037.dcm,6.4.2.8.2.7.53708.2.1.1.3837268650553342350621875478481,0.3.4.1.7.3.18404.2.1.1.14641454880764801065367514835145,5,ax dixon_opp,1.0,37,"[1, 0, 0, 0, 1, 0]",1,1,...,0,0,0,0,0,0,0,0,0,0


In [22]:
print([x for x in train_processed.columns.tolist()])

['fname', 'StudyInstanceUID', 'SeriesInstanceUID', 'SeriesNumber', 'SeriesDescription', 'AcquisitionNumber', 'InstanceNumber', 'ImageOrientationPatient', 'MRAcquisitionType', 'AngioFlag', 'SliceThickness', 'RepetitionTime', 'EchoTime', 'EchoTrainLength', 'PixelSpacing', 'ContrastBolusAgent', 'InversionTime', 'seq_E', 'seq_EP', 'seq_G', 'seq_IR', 'seq_P', 'seq_R', 'seq_S', 'seq_SE', 'var_E', 'var_K', 'var_N', 'var_O', 'var_OSP', 'var_P', 'var_S', 'var_SK', 'var_SP', 'var_SS', 'opt_2', 'opt_A', 'opt_ACC_GEMS', 'opt_D', 'opt_EDR_GEMS', 'opt_EPI_GEMS', 'opt_F', 'opt_FAST_GEMS', 'opt_FC', 'opt_FC_SLICE_AX_GEMS', 'opt_FILTERED_GEMS', 'opt_FS', 'opt_FT_GEMS', 'opt_I', 'opt_IDEAL_GEMS', 'opt_MP_GEMS', 'opt_MRCP_GEMS', 'opt_NPW', 'opt_P', 'opt_PFF', 'opt_PFP', 'opt_RTR_GEMS', 'opt_S', 'opt_SAT2', 'opt_SAT_GEMS', 'opt_SEQ_GEMS', 'opt_SFS', 'opt_SP', 'opt_SS_GEMS', 'opt_T', 'opt_TRF_GEMS', 'opt_VASCTOF_GEMS', 'opt_W', 'opt_X', 'type_', 'type_ADC', 'type_COLLAPSE', 'type_CSA MPR', 'type_CSAPARALLE

In [23]:
train_processed.SeriesDescription.value_counts()

ax diff_TRACEW                   86
ax dyn                           69
localizer                        57
cor haste                        57
ax diff_ADC                      55
                                 ..
fat quant_WP                      1
ax dixon fat quant 1_Output_W     1
WATER:cor lava flex               1
cor thk slab                      1
cor 3d MRCP                       1
Name: SeriesDescription, Length: 188, dtype: int64

In [24]:
old_version = pd.read_pickle('../data/X_train02282023.pkl')
old_version.columns

Index(['fname_x', 'StudyInstanceUID', 'SeriesInstanceUID', 'SeriesNumber',
       'SeriesDescription_x', 'AcquisitionNumber', 'InstanceNumber',
       'ImageOrientationPatient', 'MRAcquisitionType', 'AngioFlag',
       ...
       'GT plane', 'GT contrast', 'original_label_code', 'con_label_code',
       'MB_label_code', 'prob_avg', 'pixel_label_code', 'textual_label_code',
       'ap_label_code', 'pixel_ap_label_code'],
      dtype='object', length=156)

In [18]:
old_version_cols = old_version.columns
new_version_cols = train_processed.columns

set(old_version_cols) ^ set(new_version_cols)

{'GT contrast',
 'GT label',
 'GT plane',
 'MB_label_code',
 'Parent_folder',
 'SeriesDescription',
 'SeriesDescription_x',
 'SeriesDescription_y',
 'ap_label_code',
 'con_label_code',
 'contrast',
 'exam',
 'filename',
 'fname_x',
 'fname_y',
 'label_code',
 'original_label_code',
 'parent folder',
 'patientID',
 'pixel_ap_label_code',
 'pixel_label_code',
 'plane',
 'prob_avg',
 'seq_label',
 'series',
 'textual_label_code'}

In [None]:
# fns = train.file_info.tolist()

# # Create an empty list to store the DICOM metadata
# data = []

# # Loop over the DICOM file names and extract the metadata
# for file_name in file_names:
#     # Read in the DICOM file
#     ds = pydicom.dcmread(file_name)
    
#     d = {}
#     for col in dicom_cols:
#         d.key = col
#         d.value = ds['col']
#     # Store the metadata in a dictionary
#     d = {
#         'patient_id': patient_id,
#         'study_date': study_date,
#         # ... add other metadata as needed ...
#     }
    
#     # Append the dictionary to the data list
#     data.append(d)

# # Create a DataFrame from the data list
# df = pd.DataFrame(data)
# This code reads in a list of DICOM file names, then loops over the files and extracts the metadata of interest using pydicom. It stores the metadata in a dictionary, then appends the dictionary to a list of dictionaries (data). Finally, it creates a Pandas DataFrame from the list of dictionaries using pd.DataFrame(data). You can modify this code to extract the specific metadata that you are interested in from your DICOM files.








In [25]:
train_label_df = train[['fname_x', 'filename', 'Parent_folder', 'patientID', 'exam', 'series', 'SeriesDescription_y', 'plane', 'seq_label', 'contrast', 'original_label_code', 'ap_label_code']]
train_label_df.rename(columns={'SeriesDescription_y':'SeriesDescription'}, inplace=True)
train_label_df

KeyError: "['Parent_folder', 'contrast', 'seq_label', 'fname_x', 'plane', 'filename', 'exam', 'original_label_code', 'ap_label_code', 'SeriesDescription_y'] not in index"

In [None]:
test_label_df = test[['fname_x', 'filename', 'Parent_folder', 'patientID', 'exam', 'series', 'SeriesDescription_y', 'plane', 'seq_label', 'contrast', 'original_label_code', 'ap_label_code']]
test_label_df.rename(columns={'SeriesDescription_y':'SeriesDescription'}, inplace=True)
test_label_df

Unnamed: 0,fname_x,filename,Parent_folder,patientID,exam,series,SeriesDescription,plane,seq_label,contrast,original_label_code,ap_label_code
0,Abdominal MRI data/102/exam1/16 (1-40)/0010.dcm,0010.dcm,Abdominal MRI data,102,exam1,16(1-40),ax diff_TRACEW,ax,dwi,0,8,8
1,Abdominal MRI data/102/exam1/16 (41-80)/0073.dcm,0073.dcm,Abdominal MRI data,102,exam1,16(41-80),ax diff_TRACEW,ax,dwi,0,8,8
2,Abdominal MRI data/102/exam1/18/0010.dcm,0010.dcm,Abdominal MRI data,102,exam1,18,ax equilibrium new,ax,equilibrium,1,16,16
3,Abdominal MRI data/102/exam1/11/0010.dcm,0010.dcm,Abdominal MRI data,102,exam1,11,ax dyn new,ax,early dynamic,1,3,2
4,Abdominal MRI data/102/exam1/7/0010.dcm,0010.dcm,Abdominal MRI data,102,exam1,7,ax dixon_W,ax,unknown,0,25,25
...,...,...,...,...,...,...,...,...,...,...,...,...
427,Abdominal MRI data/058/exam1/16 (1-36)/0010.dcm,0010.dcm,Abdominal MRI data,58,exam1,16(1-36),ax diff_TRACEW,ax,dwi,0,8,8
428,Abdominal MRI data/058/exam1/2/0010.dcm,0010.dcm,Abdominal MRI data,58,exam1,2,cor haste,cor,t2,0,7,7
429,Abdominal MRI data/058/exam1/13/0010.dcm,0010.dcm,Abdominal MRI data,58,exam1,13,ax dyn new,ax,early dynamic,1,2,2
430,Abdominal MRI data/058/exam1/5/0010.dcm,0010.dcm,Abdominal MRI data,58,exam1,5,ax dixon_in,ax,in phase,0,14,14


In [None]:
# for colab
# trainids = pd.read_csv('/content/AIPI540_individual_project/data/trainfiles.csv')
# for local
trainids = pd.read_csv('./data/trainfiles.csv')


In [None]:
trainids.iloc[6].file_info

'/volumes/cm7/Abdominal_MRI_dataset_split/train/104/exam1/18 (35-68)/0059.dcm'

In [None]:
val_df = pd.read_pickle('/content/AIPI540_individual_project/data/val_dicom_data.pkl')

FileNotFoundError: [Errno 2] No such file or directory: '/content/AIPI540_individual_project/data/val_dicom_data.pkl'

In [None]:
val_df

Unnamed: 0,label_code_x,predictions,patientID,series,img_label_code,GT label,prediction label,probability,original_label_code,SeriesDescription_y,plane,seq_label,contrast,label_code_y,final_prediction
0,8,8,41,16(37-72),8,dwi,dwi,0.976494,8,ax diff_TRACEW,ax,dwi,0,8,8
1,30,30,41,18,16,T1 post con,T1 post con fat sat,0.956994,16,ax equilibrium new,ax,equilibrium,1,16,16
2,30,30,41,11,3,T1 post con,T1 post con fat sat,0.956994,3,ax dyn new,ax,early dynamic,1,3,3
3,25,25,41,7,25,T1 fat sat,T1 fat sat,0.957891,25,ax dixon_W,ax,unknown,0,25,25
4,11,11,41,6,11,dixon fat,dixon fat,0.955352,11,ax dixon_F,ax,unknown,0,11,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349,9,9,66,12,9,t2,t2,0.977097,9,ax t2 resp trig,ax,t2,0,9,9
350,7,7,66,2,17,t2,t2,0.914605,7,cor haste,cor,t2,0,7,7
351,11,11,66,5,11,dixon fat,dixon fat,0.834404,11,ax dixon top_F_FIL,ax,unknown,0,11,11
352,0,0,66,14,0,adc,adc,0.449541,0,ax diff_ADC,ax,dwi,0,0,0


In [None]:
### does not have the filename colums.... will look for another
