In [2]:
### Import Required Libraries

import sys  
sys.path.insert(0, '../../workspace/')
sys.path.insert(0, '../utilities/')
import os, os.path as osp
import pandas as pd
import pydicom
from tqdm import tqdm
import json
import re
import pickle
from sklearn.model_selection import StratifiedKFold, KFold
import sklearn
import numpy as np
import splitting

import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth', 400)

## Quick function to show dcm image

In [3]:
def show_image (filename):
    ds = pydicom.dcmread(filename)
    plt.imshow(ds.pixel_array, cmap=plt.cm.bone)

## Extract info from dicom files
#### Include study description to compare with train_labels_with_splits

In [4]:
# from cspine_hardware/cspine-det/src/etl/1_parse_dicom_uids.py

DICOM_DIR = '../cspine-det/data/dicoms_072919/' #Path where DICOM files are stored

#Dictionary to store DICOM files
dcm_dict = {
    'filepath': [],
    'SOPClassUID': [],
    'SOPInstanceUID' : [],
    'StudyDate': [],
    'Modality': [],
    'StudyDescription': [],
    'SeriesDescription': [],
    'PatientID': [],
    "Patient'sBirthDate": [],
    "Patient'sSex": [],
    'BodyPartExamined': [],
    'ViewPosition': [],
    'StudyInstanceUID':[],
    'SeriesInstanceUID':[],
    'PatientOrientation': []    
}

#Function to extract data from DICOM files:
def try_to_access(dcm, name):
    try:
        if name=='height':
            return (dcm.pixel_array.shape[0])
        elif name == 'width':
            return (dcm.pixel_array.shape[1])
        else:
            return(getattr(dcm, name))
    except:
        return(None)

for root, dirs, files in tqdm(os.walk(DICOM_DIR, topdown=True)):
    for f in files:
        filename = osp.join(root,f)
        dcm_dict['filepath'].append(filename.replace('../../data/',''))
        dcm = pydicom.dcmread(filename)
        dcm_dict['SOPClassUID'].append(try_to_access(dcm, 'SOPClassUID'))
        dcm_dict['SOPInstanceUID'].append(try_to_access(dcm, 'SOPInstanceUID'))
        dcm_dict['StudyDate'].append(try_to_access(dcm,'StudyDate'))
        dcm_dict['Modality'].append(try_to_access(dcm, 'Modality'))
        dcm_dict['StudyDescription'].append(try_to_access(dcm, 'StudyDescription'))
        dcm_dict['SeriesDescription'].append(try_to_access(dcm, 'SeriesDescription'))
        dcm_dict['PatientID'].append(try_to_access(dcm, 'PatientID'))
        dcm_dict["Patient'sBirthDate"].append(try_to_access(dcm, "Patient'sBirthDate"))
        dcm_dict["Patient'sSex"].append(try_to_access(dcm, "Patient'sSex"))
        dcm_dict['BodyPartExamined'].append(try_to_access(dcm,'BodyPartExamined'))
        dcm_dict['ViewPosition'].append(try_to_access(dcm,'ViewPosition'))
        dcm_dict['StudyInstanceUID'].append(try_to_access(dcm, 'StudyInstanceUID'))
        dcm_dict['SeriesInstanceUID'].append(try_to_access(dcm, 'SeriesInstanceUID'))
        dcm_dict['PatientOrientation'].append(try_to_access(dcm, 'PatientOrientation'))
                                                      
dcm_df = pd.DataFrame(dcm_dict) #Save DICOM data to dataframe
dcm_df.to_csv('DICOM_Extract_v2.csv') #Save as a CSV file                        
  

0it [00:00, ?it/s]


In [5]:
dcm_df = pd.read_csv('../DICOM_Extract_v2.csv')
study_types = dcm_df['StudyDescription'].unique()
print(study_types)

['XR SPINE CERVICAL 2-3 VIEWS' 'XR Spine Cervical 4-5 Views'
 'XR SPINE CERVICAL 1 VIEW' 'XR SPINE CERVICAL MYELOGRAM'
 'XR SPINE CERVICAL 4-5 VIEWS' 'XR SPINE CERVICAL 4 VIEWS'
 'XR SPINE MYELOGRAM CERV  THOR' 'XR SPINE CERVICAL 2-3 VWS STND PROTOCOL'
 'CT SPINE CERVICAL W CONTRAST' 'XR Spine Cervical 2-3 Vws Stnd Protocol'
 'XR Spine Cervical 2-3 Views' 'XR SPINE CERVICAL COMP OBLIQ  FLEXEXT'
 'XR SPINE CERVICAL COMP FLEXEXT']


#### CT SPINE CERVICAL W CONTRAST not appropriate
#### XR SPINE CERVICAL COMP OBLIQ  FLEXEXT not appropriate
#### XR SPINE CERVICAL COMP FLEXEXT not appropriate
#### XR SPINE CERVICAL MYELOGRAM not appropriate
#### XR SPINE MYELOGRAM CERV  THOR not appropriate
#### All other views should be able to use

## Import brands from spreadsheet and map to dictionary

In [6]:
# only include appropriate studies
studies = ['XR SPINE CERVICAL 2-3 VIEWS', 'XR Spine Cervical 4-5 Views','XR SPINE CERVICAL 1 VIEW', 'XR SPINE CERVICAL 4-5 VIEWS' 'XR SPINE CERVICAL 4 VIEWS', 'XR SPINE CERVICAL 2-3 VWS STND PROTOCOL', 'XR Spine Cervical 2-3 Vws Stnd Protocol', 'XR Spine Cervical 2-3 Views']
dcm_df = dcm_df[dcm_df['StudyDescription'].isin(studies)]

# Read Excel sheet for mapping brands to patients
df = pd.read_excel('../../hdw_merged_anon_final.xlsx')
# Get rid of 'Unnamed' columns
df = df[[col for col in df.columns if not re.search('Unnamed', col)]].drop_duplicates().reset_index(drop=True)
# Now, make presence/absence of hardware labels based on Anonymized DOS
patient_df = df[['AnonymizedPatientID', 'Anonymized DOS', 'Anterior HDW', 'Posterior HDW', 'Cage']]
patient_df = patient_df.drop_duplicates().reset_index(drop=True)
# Rename PatientID column to make the merge easier
patient_df = patient_df.rename(columns={'AnonymizedPatientID':'PatientID'})

# Note: Patient CSP_00015 seems to have had their Anterior HDW changed from one brand to another, with xrays for both
# present in the dataset. This means all images for this patient are currently labeled with 2 brands.
# Treat patient before second surgery as CSP_00015 and treat patient after second surgery as a new patient CSP00191
# ***Assumption: any images the day of the surgery were pre-surgery and should be labeled as Atlantis
patient_df.at[15, 'PatientID'] = 'CSP_00191'

# many brands do not have enough patients to stratify them (< 3, we need 3 since we are splitting into
# 3 sets)
# create one 'other' category for Posterior HDW (VIRAGE, OASYS, Centerpiece/Vertex)
# create another 'other' category for Anterior HDW (HELIX and ZEVO)
# create a separate df for Posterior and Anterior Patients

Posterior_brand_dict = {
    "MOUNTAINEER" : 0,
    "ARCH" : 1,
    "Vertex" : 2,
    "VIRAGE" : 3,
    "OASYS" : 3,
    "Centerpiece/Vertex" : 3
}

Anterior_brand_dict = {
    "ATLANTIS" : 0,
    "ATLANTIS " : 0,
    "ARCHON" : 1,
    "MAXAN" : 2, 
    "HELIX" : 3,
    "ZEVO" : 3,
}

# make the order of selection Posterior, Anterior, Cage
# this way if a patient has both Posterior and Anterior Hardware, they will be
# identified with their Posterior hardware
# Patients with no hardware get excluded (4 patients)
Posterior_patients = patient_df[patient_df['Posterior HDW'].isna()==False]
Anterior_patients = pd.concat([patient_df, Posterior_patients]).drop_duplicates(keep=False)
Anterior_patients = Anterior_patients[Anterior_patients['Anterior HDW'].isna()==False]

Posterior_patients = Posterior_patients.reset_index(drop=True)
for index, row in Posterior_patients.iterrows():
    Posterior_patients.at[index,'Label'] = Posterior_brand_dict[row['Posterior HDW']]

Anterior_patients = Anterior_patients.reset_index(drop=True)
for index, row in Anterior_patients.iterrows():
    Anterior_patients.at[index,'Label'] = Anterior_brand_dict[row['Anterior HDW']]


Types of Posterior Brands : Virage, Oasys, and Centerpiece/Vertex should go in the Posterior Other Category

In [7]:
patient_df['Posterior HDW'].value_counts()

MOUNTAINEER           13
ARCH                   4
Vertex                 3
OASYS                  1
VIRAGE                 1
Centerpiece/Vertex     1
Name: Posterior HDW, dtype: int64

Types of Anterior Brands: Helix and Zevo should go in the Anterior Other Category

In [8]:
patient_df['Anterior HDW'].value_counts()

ATLANTIS     122
ARCHON        32
MAXAN         13
ZEVO           2
HELIX          2
ATLANTIS       1
Name: Anterior HDW, dtype: int64

Types of Cage Brands

In [9]:
patient_df['Cage'].value_counts()

Nuvasive    1
Name: Cage, dtype: int64

## merge brands with images, remove images before DoS, remove multilabel images,
## remove other views

In [10]:
# Step 1: join tables on patient ID
Posterior_branded_dcm = pd.merge(dcm_df, Posterior_patients, on='PatientID')
Anterior_branded_dcm = pd.merge(dcm_df, Anterior_patients, on='PatientID')
Anterior_branded_dcm.loc[(Anterior_branded_dcm['StudyDate'] > 20150901) & (Anterior_branded_dcm['PatientID']=='CSP_00015'), 'Anterior HDW'] = 'ZEVO'
Anterior_branded_dcm.loc[(Anterior_branded_dcm['StudyDate'] > 20150901) & (Anterior_branded_dcm['PatientID']=='CSP_00015'), 'Label'] = 3
Anterior_branded_dcm.loc[(Anterior_branded_dcm['StudyDate'] > 20150901) & (Anterior_branded_dcm['PatientID']=='CSP_00015'), 'Anonymized DOS'] = pd.to_datetime('20150901', format='%Y%m%d')
Anterior_branded_dcm.loc[(Anterior_branded_dcm['StudyDate'] > 20150901) & (Anterior_branded_dcm['PatientID']=='CSP_00015'), 'PatientID'] = 'CSP_00191'

# Step 2: only take images after DOS
# Do not take images before DOS as 'no hardware' because we are unsure if they came in with some hardware
# One posterior patient is dropped because all their images were before DOS, and did not contain an implant (but were still labeled with the hardware)
Posterior_branded_dcm = Posterior_branded_dcm[pd.to_datetime(Posterior_branded_dcm['StudyDate'], format='%Y%m%d') > Posterior_branded_dcm['Anonymized DOS']]
Anterior_branded_dcm = Anterior_branded_dcm[pd.to_datetime(Anterior_branded_dcm['StudyDate'], format='%Y%m%d') > Anterior_branded_dcm['Anonymized DOS']]

# Step 3: All patients who have multiple implants should be in the Posterior_branded_dcm df (confirmed)
# Include a binary indicator in both dfs to show whether each image is from a patient with multiple implants
for index, row in Posterior_branded_dcm.iterrows():
    if (((pd.isna(row['Anterior HDW'])==False) & (pd.isna(row['Posterior HDW'])==False)) | ((pd.isna(row['Cage'])==False) & (pd.isna(row['Posterior HDW'])==False))):
        Posterior_branded_dcm.at[index,'Multiple'] = 1
    else:
        Posterior_branded_dcm.at[index,'Multiple'] = 0

for index, row in Anterior_branded_dcm.iterrows():
    if (((pd.isna(row['Anterior HDW'])==False) & (pd.isna(row['Posterior HDW'])==False)) | ((pd.isna(row['Cage'])==False) & (pd.isna(row['Anterior HDW'])==False))):
        Anterior_branded_dcm.at[index,'Multiple'] = 1
    else:
        Anterior_branded_dcm.at[index,'Multiple'] = 0

# Step 4: Drop views that are not AP, LATERAL, LATERAL FLEX, LATERAL EXT, LL
# Create new column indicating image view
views = ['AP', 'LATERAL','LATERAL FLEX', 'LATERAL EXT', 'LL']
Posterior_branded_dcm = Posterior_branded_dcm[Posterior_branded_dcm['ViewPosition'].isin(views)]
Anterior_branded_dcm = Anterior_branded_dcm[Anterior_branded_dcm['ViewPosition'].isin(views)]
for index, row in Posterior_branded_dcm.iterrows():
    if (row['ViewPosition']=='AP'):
        Posterior_branded_dcm.at[index,'View'] = 'AP'
    else:
        Posterior_branded_dcm.at[index,'View'] = 'L'
for index, row in Anterior_branded_dcm.iterrows():
    if (row['ViewPosition']=='AP'):
        Anterior_branded_dcm.at[index,'View'] = 'AP'
    else:
        Anterior_branded_dcm.at[index,'View'] = 'L'
        
Posterior_branded_dcm = Posterior_branded_dcm.reset_index()
Anterior_branded_dcm = Anterior_branded_dcm.reset_index()

In [11]:
# remove images where pixel array does not exist (seems like they are all in anterior)

no_pixels = ['../cspine-det/data/dicoms_072919/Csp_00075_20130622/IM-2297-0001-0002.dcm',
'../cspine-det/data/dicoms_072919/Csp_00075_20130622/IM-2297-0001-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00075_20130622/IM-2297-0001-0005.dcm',
'../cspine-det/data/dicoms_072919/Csp_00075_20130622/IM-2297-0001-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00001_20130622/IM-2288-0001-0005.dcm',
'../cspine-det/data/dicoms_072919/Csp_00001_20130622/IM-2288-0001-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00001_20130622/IM-2288-0001-0002.dcm',
'../cspine-det/data/dicoms_072919/Csp_00001_20130622/IM-2288-0001-0003.dcm']

for dcm in no_pixels:
    Posterior_branded_dcm = Posterior_branded_dcm.drop(Posterior_branded_dcm[Posterior_branded_dcm['filepath']==dcm].index)

for dcm in no_pixels:
    Anterior_branded_dcm = Anterior_branded_dcm.drop(Anterior_branded_dcm[Anterior_branded_dcm['filepath']==dcm].index)

# remove images of teeth in posterior dataset
posterior_teeth = ['../cspine-det/data/dicoms_072919/Csp_00095_20140411/IM-1973-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00182_20151227/IM-1432-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00157_20161113/IM-0913-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00004_20100119/IM-2548-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00004_20100119/IM-2548-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00030_20110202/IM-0010-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00133_20131111/IM-2132-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00184_20160315/IM-1299-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00183_20151221/IM-1439-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00183_20160507/IM-1202-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00085_20131127/IM-2115-0002.dcm',
'../cspine-det/data/dicoms_072919/Csp_00117_20160405/IM-1267-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00006_20100202/IM-2543-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00006_20100202/IM-2543-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00108_20150829/IM-1585-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00071_20130117/IM-2423-0002.dcm',
'../cspine-det/data/dicoms_072919/Csp_00079_20140129/IM-2040-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00079_20170129/IM-0755-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00178_20151002/IM-1539-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00147_20180107/IM-0313-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00120_20160523/IM-1167-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00185_20160716/IM-1058-0001.dcm']

for dcm in posterior_teeth:
    Posterior_branded_dcm = Posterior_branded_dcm.drop(Posterior_branded_dcm[Posterior_branded_dcm['filepath']==dcm].index)
Posterior_branded_dcm = Posterior_branded_dcm.reset_index()

# remove images of teeth in anterior dataset
anterior_teeth = ['../cspine-det/data/dicoms_072919/Csp_00081_20130803/IM-2232-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00081_20130803/IM-2232-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00089_20140325/IM-1988-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00132_20180314/IM-0208-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00132_20180316/IM-0207-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00132_20160419/IM-1238-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00063_20121003/IM-2471-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00077_20130302/IM-2408-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00064_20120804/IM-2491-0005.dcm',
'../cspine-det/data/dicoms_072919/Csp_00094_20140823/IM-1868-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00179_20160531/IM-1157-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00015_20100706/IM-2500-0002.dcm',
'../cspine-det/data/dicoms_072919/Csp_00015_20150902/IM-1569-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00165_20160901/IM-1000-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00034_20110510/IM-2595-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00036_20110426/IM-2599-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00020_20110101/IM-0021-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00020_20110101/IM-0021-0003.dcm',
'../cspine-det/data/dicoms_072919/Csp_00113_20161108/IM-0916-0001.dcm',
'../cspine-det/data/dicoms_072919/Csp_00103_20150801/IM-1628-0004.dcm',
'../cspine-det/data/dicoms_072919/Csp_00142_20140415/IM-1968-0001.dcm'
]

for dcm in anterior_teeth:
    Anterior_branded_dcm = Anterior_branded_dcm.drop(Anterior_branded_dcm[Anterior_branded_dcm['filepath']==dcm].index)
Anterior_branded_dcm = Anterior_branded_dcm.reset_index()

# Create master csv files for images

One master file for anterior patients, one for posterior
<br>Can create stratified files by function call to repeatedly stratify data
<br>SeriesInstanceUID and ViewPosition are for the models that take data from the same series together (AuxLoss, HeMIS)

In [12]:
Master_Posterior_branded_dcm = Posterior_branded_dcm[['PatientID', 'filepath', 'Label', 'View', 'Multiple', 'Posterior HDW', 'SeriesInstanceUID', 'ViewPosition']]
Master_Posterior_branded_dcm = Master_Posterior_branded_dcm.rename(columns={'Posterior HDW':'Brand'})
Master_Anterior_branded_dcm = Anterior_branded_dcm[['PatientID', 'filepath', 'Label', 'View', 'Multiple', 'Anterior HDW', 'SeriesInstanceUID', 'ViewPosition']]
Master_Anterior_branded_dcm = Master_Anterior_branded_dcm.rename(columns={'Anterior HDW':'Brand'})
# Master_Posterior_branded_dcm.to_csv('Master_Posterior_HDW.csv', index=False)
# Master_Anterior_branded_dcm.to_csv('Master_Anterior_HDW.csv', index=False)

## Basic Train-Test-Validate Stratify function
#### Formalized in utilities/splitting.py

In [10]:
import sklearn.model_selection

def split_data(master, suffix):

    patient_ids = master[['PatientID', 'Label']].drop_duplicates().reset_index()['PatientID']
    patient_labels = master[['PatientID', 'Label']].drop_duplicates().reset_index()['Label']
    Train_IDs_Strat, Test_IDs_Strat, Train_Labels_Strat, Test_Labels_Strat = sklearn.model_selection.train_test_split(patient_ids, patient_labels, test_size = 0.2, random_state=1, stratify=patient_labels)
    # split the training set again to get a validation set
    Train_IDs_Strat, Val_IDs_Strat, Train_Labels_Strat, Val_Labels_Strat = sklearn.model_selection.train_test_split(Train_IDs_Strat, Train_Labels_Strat, test_size = 0.2, random_state=1, stratify=Train_Labels_Strat)

    Train_DCMs_Strat = master[master['PatientID'].isin(Train_IDs_Strat)]
    Val_DCMs_Strat = master[master['PatientID'].isin(Val_IDs_Strat)]
    Test_DCMs_Strat = master[master['PatientID'].isin(Test_IDs_Strat)]

    Train_file = 'Train_' + suffix + '.csv'
    Val_file = 'Val_' + suffix + '.csv'
    Test_file = 'Test_' + suffix + '.csv'

    Train_DCMs_Strat.to_csv(Train_file, index=False)
    Val_DCMs_Strat.to_csv(Val_file, index=False)
    Test_DCMs_Strat.to_csv(Test_file, index=False)

## Train-Test-Validate Stratify Function with Holdout

#### Formalized in utilities/splitting.py

Function will allow user which brand from the dictionary should be held out from the training and validation sets and included in the test set (based on brand name, not label, since the brand being held out may be from the 'other' category)

In [27]:
def holdout_data(master, suffix, brand):

    # remove all images from the holdout brand
    holdouts = master[master['Brand']==brand]
    removed = pd.concat([master, holdouts]).drop_duplicates(keep=False)

    patient_ids = removed[['PatientID', 'Label', 'Brand']].drop_duplicates().reset_index()['PatientID']
    patient_labels = removed[['PatientID', 'Label', 'Brand']].drop_duplicates().reset_index()['Label']

    # split the patients in the removed set into train and test sets (80:20)
    Train_IDs_Strat, Test_IDs_Strat, Train_Labels_Strat, Test_Labels_Strat = sklearn.model_selection.train_test_split(patient_ids, patient_labels, test_size = 0.2, random_state=1, stratify=patient_labels)
    # split the training set again to get a validation set
    Train_IDs_Strat, Val_IDs_Strat, Train_Labels_Strat, Val_Labels_Strat = sklearn.model_selection.train_test_split(Train_IDs_Strat, Train_Labels_Strat, test_size = 0.2, random_state=1, stratify=Train_Labels_Strat)

    Train_DCMs_Strat = removed[removed['PatientID'].isin(Train_IDs_Strat)]
    Val_DCMs_Strat = removed[removed['PatientID'].isin(Val_IDs_Strat)]
    Test_DCMs_Strat = removed[removed['PatientID'].isin(Test_IDs_Strat)]

    # add the held-out images to the test set
    Test_DCMs_Strat = pd.concat([Test_DCMs_Strat, holdouts])

    Train_file = 'Train_' + suffix + '.csv'
    Val_file = 'Val_' + suffix + '.csv'
    Test_file = 'Test_' + suffix + '.csv'

    Train_DCMs_Strat.to_csv(Train_file, index=False)
    Val_DCMs_Strat.to_csv(Val_file, index=False)
    Test_DCMs_Strat.to_csv(Test_file, index=False)

116

## Create master files for multi-view learning

In [122]:
def write_multiview_master(master, hardware):
    # reformats the master table to so that it can be split by split_data and holdout_data
    
    all_series = master['SeriesInstanceUID'].unique()
    filepath_AP = []
    filepath_L = []
    label = []
    patients = []
    multiple = []
    brand = []

    # take the master df and, for each series (set of xrays that were taken together)
    # get the DCMs, views, patient, and brand. Each image in the tuple will be an input to one of the models
    for series in all_series:
        examples_df = master[master['SeriesInstanceUID']==series].reset_index()
        
        # find the number of AP and L views in the series and pair them up
        # if there are an unequal numbers, the excess views will be paired with 'NaN'
        num_AP = len(examples_df[examples_df['View']=='AP'])
        num_L = len(examples_df[examples_df['View']=='L'])
        max_views = max(num_AP, num_L)
        
        AP_files = examples_df[examples_df['View']=='AP'].reset_index()
        L_files = examples_df[examples_df['View']=='L'].reset_index()
        
        for appends in range(max_views):
            if (appends < num_AP):
                filepath_AP.append('../' + AP_files.filepath[appends])
            else:
                filepath_AP.append('NaN')
            if (appends < num_L):
                filepath_L.append('../' + L_files.filepath[appends])
            else:
                filepath_L.append('NaN')

            label.append(examples_df.at[0,'Label'])
            patients.append(examples_df.at[0,'PatientID'])
            multiple.append(examples_df.at[0,'Multiple'])
            brand.append(examples_df.at[0,'Brand'])

    views_df = pd.DataFrame({'PatientID':patients,'filepath_AP':filepath_AP, 'filepath_L':filepath_L,'Label':label,'Multiple':multiple,'Brand':brand})
    name = 'Master_' + hardware + '_HDW_MultiView.csv'
    views_df.to_csv(name, index=False)
    
    return views_df

In [123]:
PosteriorViews = write_multiview_master(Master_Posterior_branded_dcm, 'Posterior')
AnteriorViews = write_multiview_master(Master_Anterior_branded_dcm, 'Anterior')