In [2]:
# default_exp series.preproc

# series.preproc

> Tools for preprocessing DICOM metadata imported using `dicomtools.core` into in a `pandas.DataFrame` in preparation for training RandomForest classifier to predict series type.

In [3]:
#hide
from nbdev.showdoc import *

In [4]:
#export
from dicomtools.imports import *
from dicomtools.core import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MultiLabelBinarizer

In [5]:
#export
abd_label_dict = {
    '1': {
        'long': 'Anythingelse',
        'short': 'unknown',
        'plane': 'unknown',
        'contrast': 'unknown'
    },
    '2': {
        'long': 'Arterial T1w',
        'short': 'early dynamic',
        'plane': 'ax',
        'contrast': '1'
    },
    '3': {
        'long': 'Early Arterial T1w',
        'short': 'early dynamic',
        'plane': 'ax',
        'contrast': '1'
    },
    '4': {
        'long': 'Late Arterial T1w',
        'short': 'early dynamic',
        'plane': 'ax',
        'contrast': '1'
    },
    '5': {
        'long': 'Arterial Subtraction',
        'short': 'art sub',
        'plane': 'ax',
        'contrast': '1'
    },
    '6': {
        'long': 'Coronal Late Dynamic T1w',
        'short': 't1 late', 
        'plane': 'cor',
        'contrast': '1'
    },
    '7': {
        'long': 'Coronal T2w',
        'short': 't2', 
        'plane': 'cor',
        'contrast': '0'
    },
    '8': {
        'long': 'Axial DWI',
        'short': 'dwi', 
        'plane': 'ax',
        'contrast': '0'
    },
    '9': {
        'long': 'Axial T2w',
        'short': 't2', 
        'plane': 'ax',
        'contrast': '0'
    },
    '10': {
        'long': 'Coronal DWI',
        'short': 'diff', 
        'plane': 'cor',
        'contrast': '0'
    },
    '11': {
        'long': 'Fat Only',
        'short': 'dixon fat', 
        'plane': 'ax',
        'contrast': '0'
    },
    '12': {
        'long': 'Axial Transitional_Hepatocyte T1w',
        'short': 'hepatobiliary', 
        'plane': 'ax',
        'contrast': '1'
    },
    '13': {
        'long': 'Coronal Transitional_Hepatocyte T1w',
        'short': 'hepatobiliary', 
        'plane': 'cor',
        'contrast': '1'
    },
    '14': {
        'long': 'Axial In Phase',
        'short': 'in phase',
        'plane': 'ax',
        'contrast': '0'
    },
    '15': {
        'long': 'Coronal In Phase',
        'short': 'in phase', 
        'plane': 'cor',
        'contrast': '0'
    },
    '16': {
        'long': 'Axial Late Dyanmic T1w',
        'short': 'equilibrium', 
        'plane': 'ax',
        'contrast': '1'
    },
    '17': {
        'long': 'Localizers',
        'short': 'localizers', 
        'plane': 'unkwn',
        'contrast': '0'
    },
    '18': {
        'long': 'MRCP',
        'short': 'mrcp', 
        'plane': 'cor',
        'contrast': '0'
    },
    '19': {
        'long': 'Axial Opposed Phase',
        'short': 'opposed phase', 
        'plane': 'ax',
        'contrast': '0'
    },
    '20': {
        'long': 'Coronal Opposed Phase',
        'short': 'opposed phase', 
        'plane': 'cor',
        'contrast': '0'
    },
    '21': {
        'long': 'Proton Density Fat Fraction',
        'short': 'fat quant',
        'plane': 'ax',
        'contrast': '0'
    },
    '22': {
        'long': 'Water Density Fat Fraction',
        'short': 'water fat quant', 
        'plane': 'ax',
        'contrast': '0'
    },
    '23': {
        'long': 'Portal Venous T1w',
        'short': 'portal venous', 
        'plane': 'ax',
        'contrast': '1'
    },
    '24': {
        'long': 'Coronal Precontrast Fat Suppressed T1w',
        'short': 'T1 fat sat',
        'plane': 'cor',
        'contrast': '0'
    },
    '25': {
        'long': 'Axial Precontrast Fat Suppressed T1w',
        'short': 'T1 fat sat', 
        'plane': 'ax',
        'contrast': '0'
    },
    '26': {
        'long': 'R*2',
        'short': 'r*2', 
        'plane': 'ax',
        'contrast': '0'
    },
    '27': {
        'long': 'Axial Steady State Free Precession',
        'short': 'ssfse', 
        'plane': 'ax',
        'contrast': '0'
    },
    '28': {
        'long': 'Coronal Steady State Free Precession',
        'short': 'ssfse', 
        'plane': 'cor',
        'contrast': '1'
    },
    '29': {
        'long': 'Venous Subtraction',
        'short': 'ven sub',
        'plane': 'ax',
        'contrast': '1'
    },
    '0': {
        'long': 'Axial ADC',
        'short': 'adc', 
        'plane': 'ax',
        'contrast': '0'
    },
     '30': {
        'long': 'Axial Post Contrast Fat Suppressed T1w',
        'short': 'T1 post con fat sat', 
        'plane': 'ax',
        'contrast': '1'
    },
    '31': {
        'long': 'Coronal Post Contrast Fat Suppressed T1w',
        'short': 'T1 post con fat sat',
        'plane': 'cor',
        'contrast': '1'
    },
     '32': {
        'long': 'Post Contrast Fat Suppressed T1w',
        'short': 'T1 post con fat sat',
        'plane': 'ax/cor',
        'contrast': '1'
    }
}

In [6]:
#export
abd_label_dict_updated = {
    '1': {
        'long': 'Anythingelse',
        'short': 'other',
        'plane': 'other',
        'contrast': 'other'
    },
    '2': {
        'long': 'Arterial T1w',
        'short': 'arterial',
        'plane': 'ax',
        'contrast': '1'
    },
    '3': {
        'long': 'Early Arterial T1w',
        'short': 'early_arterial',
        'plane': 'ax',
        'contrast': '1'
    },
    '4': {
        'long': 'Late Arterial T1w',
        'short': 'late_arterial',
        'plane': 'ax',
        'contrast': '1'
    },
    '5': {
        'long': 'Arterial Subtraction',
        'short': 'arterial_sub',
        'plane': 'ax',
        'contrast': '1'
    },
    '6': {
        'long': 'Coronal Late Dynamic T1w',
        'short': 'dynamic_late',
        'plane': 'cor',
        'contrast': '1'
    },
    '7': {
        'long': 'Coronal T2w',
        'short': 't2',
        'plane': 'cor',
        'contrast': '0'
    },
    '8': {
        'long': 'Axial DWI',
        'short': 'dwi',
        'plane': 'ax',
        'contrast': '0'
    },
    '9': {
        'long': 'Axial T2w',
        'short': 't2',
        'plane': 'ax',
        'contrast': '0'
    },
    '10': {
        'long': 'Coronal DWI',
        'short': 'dwi',
        'plane': 'cor',
        'contrast': '0'
    },
    '11': {
        'long': 'Fat Only',
        'short': 'dixon_fat',
        'plane': 'ax',
        'contrast': '0'
    },
    '12': {
        'long': 'Axial Transitional_Hepatocyte T1w',
        'short': 'hepatobiliary',
        'plane': 'ax',
        'contrast': '1'
    },
    '13': {
        'long': 'Coronal Transitional_Hepatocyte T1w',
        'short': 'hepatobiliary',
        'plane': 'cor',
        'contrast': '1'
    },
    '14': {
        'long': 'Axial In Phase',
        'short': 'in_phase',
        'plane': 'ax',
        'contrast': '0'
    },
    '15': {
        'long': 'Coronal In Phase',
        'short': 'in_phase',
        'plane': 'cor',
        'contrast': '0'
    },
    '16': {
        'long': 'Axial Late Dyanmic T1w',
        'short': 'dynamic_equilibrium',
        'plane': 'ax',
        'contrast': '1'
    },
    '17': {
        'long': 'Localizers',
        'short': 'loc',
        'plane': 'unknown',
        'contrast': '0'
    },
    '18': {
        'long': 'MRCP',
        'short': 'mrcp',
        'plane': 'cor',
        'contrast': '0'
    },
    '19': {
        'long': 'Axial Opposed Phase',
        'short': 'opposed_phase',
        'plane': 'ax',
        'contrast': '0'
    },
    '20': {
        'long': 'Coronal Opposed Phase',
        'short': 'opposed_phase',
        'plane': 'cor',
        'contrast': '0'
    },
    '21': {
        'long': 'Proton Density Fat Fraction',
        'short': 'fat_quant',
        'plane': 'ax',
        'contrast': '0'
    },
    '22': {
        'long': 'Water Density Fat Fraction',
        'short': 'water_fat_quant',
        'plane': 'ax',
        'contrast': '0'
    },
    '23': {
        'long': 'Portal Venous T1w',
        'short': 'portal_venous',
        'plane': 'ax',
        'contrast': '1'
    },
    '24': {
        'long': 'Coronal Precontrast Fat Suppressed T1w',
        'short': 't1_fat_sat',
        'plane': 'cor',
        'contrast': '0'
    },
    '25': {
        'long': 'Axial Precontrast Fat Suppressed T1w',
        'short': 't1_fat_sat',
        'plane': 'ax',
        'contrast': '0'
    },
    '26': {
        'long': 'R*2',
        'short': 'r_star_2',
        'plane': 'ax',
        'contrast': '0'
    },
    '27': {
        'long': 'Axial Steady State Free Precession',
        'short': 'ssfse',
        'plane': 'ax',
        'contrast': '0'
    },
    '28': {
        'long': 'Coronal Steady State Free Precession',
        'short': 'ssfse',
        'plane': 'cor',
        'contrast': '1'
    },
    '29': {
        'long': 'Venous Subtraction',
        'short': 'venous_sub',
        'plane': 'ax',
        'contrast': '1'
    },
    '0': {
        'long': 'Axial ADC',
        'short': 'adc',
        'plane': 'ax',
        'contrast': '0'
    },
     '30': {
        'long': 'Axial Post Contrast Fat Suppressed T1w',
        'short': 't1_fat_sat',
        'plane': 'ax',
        'contrast': '1'
    },
    '31': {
        'long': 'Coronal Post Contrast Fat Suppressed T1w',
        'short': 't1_fat_sat',
        'plane': 'cor',
        'contrast': '1'
    },
    '32': {
        'long': 'Post Contrast Fat Suppressed T1w',
        'short': 't1_fat_sat',
        'plane': 'ax/cor',
        'contrast': '1'
    }
}

In [7]:
#export
def exclude_other(df):
    if 'BodyPartExamined' not in df.columns: return df
    other = ['SPINE', 'CSPINE', 'PELVIS', 'PROSTATE']
    filt = df.BodyPartExamined.isin(other)
    df1 = df[~filt].copy().reset_index(drop=True)
    filt1 = df1.SeriesDescription.str.contains(r'(cervical|thoracic|lumbar)', case=False, na=False)
    df2 = df1[~filt1].reset_index(drop=True)
    filt2 = df2.SOPClassUID == "MR Image Storage"
    return df2[filt2].reset_index(drop=True)


In [8]:
#export
def get_series_fp(fn): return Path(fn).parent

In [9]:
#export
def compute_plane(row):
    '''
    Computes the plane of imaging from the direction cosines provided in the `ImageOrientationPatient` field.
    The format of the values in this field is: `[x1, y1, z1, x2, y2, z2]`,
    which correspond to the direction cosines for the first row and column of the image pixel data.
    '''
    planes = ['sag', 'cor', 'ax']
    if 'ImageOrientationPatient1' in row.keys():
        dircos = [v for k,v in row.items() if 'ImageOrientationPatient' in k]
    else:
        dircos = row['ImageOrientationPatient']
    dircos = np.array(dircos).reshape(2,3)
    pnorm = abs(np.cross(dircos[0], dircos[1]))
    return planes[np.argmax(pnorm)]


In [10]:
#export
_re_extra_info = re.compile(r'[<\([].*?[\]\)>]')


In [11]:
#export
def rm_extra_info(t):
    "Remove extraneous info in closures"
    return _re_extra_info.sub('', t).strip()


In [12]:
assert rm_extra_info('ax t1 <mpr>') == 'ax t1'
assert rm_extra_info('adc (mm^2/s)') == 'adc'
assert rm_extra_info('ax t1 [date]') == 'ax t1'

In [13]:
#export
#added other possible words for post contrast images in the abdomen
_c = re.compile(r'(\+-?c|post|with|dyn|portal|equilibrium|hepatobiliary|delayed)')

In [14]:
#export
def detect_contrast(row):
    sd = rm_extra_info(str(row['SeriesDescription']).lower())
    if _c.search(sd): return 1
    c = row['ContrastBolusAgent']
    if type(c) == str: return 1
    return 0


In [15]:
row = {'SeriesDescription': 'ax t1 +c', 'ContrastBolusAgent': 'Gadavist'}
row1 = {'SeriesDescription': 'ax t1', 'ContrastBolusAgent': np.nan}
row2 = {'SeriesDescription': 'ax t1', 'ContrastBolusAgent': 'Gadavist'} # example of discordant SD
row3 = {'SeriesDescription': 'AX T1 POST', 'ContrastBolusAgent': np.nan} # also discordant
row4 = {'SeriesDescription': 'ax with', 'ContrastBolusAgent': 'MultiHance'}
row5 = {'SeriesDescription': 'ax dyn', 'ContrastBolusAgent': np.nan}
row6 = {'SeriesDescription': 'ax equilibrium', 'ContrastBolusAgent': 'Eovist'}
assert detect_contrast(row) == 1
assert detect_contrast(row1) == 0
assert detect_contrast(row2) == 1
assert detect_contrast(row3) == 1
assert detect_contrast(row4) == 1
assert detect_contrast(row5) == 1
assert detect_contrast(row6) == 1



In [16]:
#export
_t1 = re.compile(r't1')
_t1_in = re.compile(r'dixon_in')
_t1_out = re.compile(r'opp')
_water = re.compile(r'dixon_W|water')
_fat = re.compile(r'dixon_F|fat')
_pv = re.compile(r'portal')
_eq = re.compile(r'equilibrium')
_art = re.compile(r'art|arterial|dyn|early')
_delayed = re.compile(r'delay|15|20|hepatobiliary')
_spgr = re.compile(r'spgr|mprage|bravo|vibe')
_t2 = re.compile(r't2|haste')
_flair = re.compile(r'flair')
_swi = re.compile(r'swi|gre|susc|mag|pha|sw|hemo')
_adc = re.compile(r'adc|apparent')
_eadc = re.compile(r'exp|eadc')
_dwi = re.compile(r'diff|dwi|trace')
_mra = re.compile(r'mra|angio|cow|tof|mip|mrv')
_loc = re.compile(r'loc|scout|smartbrain')
_other = re.compile(r'ciss|fiesta|stir|pd|cube|pc')

In [17]:
#export
def _find_seq(sd):
    if _t1.search(sd):
        if _spgr.search(sd): return 'spgr'
        if _t1_in.search(sd): return 'in phase'
        if _t1_out.search(sd): return 'opposed phase'
        if _water.search(sd): return 'dixon water'
        if _fat.search(sd): return 'dixon fat'    
        if _pv.search(sd): return 'portal venous'
        if _eq.search(sd): return 'equilibrium'
        if _art.search(sd): return 'early dynamic'
        if _delayed.search(sd): return 'hepatobiliary'
        else: return 't1'
    if _t1_in.search(sd): return 'in phase'
    if _t1_out.search(sd): return 'opposed phase'
    if _water.search(sd): return 'dixon water'
    if _fat.search(sd): return 'dixon fat'    
    if _pv.search(sd): return 'portal venous'
    if _eq.search(sd): return 'equilibrium'
    if _art.search(sd): return 'early dynamic'
    if _delayed.search(sd): return 'hepatobiliary'
#    if _spgr.search(sd): return 'spgr'
    if _t2.search(sd):
        if _flair.search(sd): return 'flair'
        elif _swi.search(sd): return 'swi'
        else: return 't2'
    if _flair.search(sd): return 'flair'
    if _swi.search(sd): return 'swi'
    if _dwi.search(sd): return 'dwi'
    if _adc.search(sd): return 'dwi'
    if _eadc.search(sd): return 'dwi'
    if _mra.search(sd): return 'mra'
    if _loc.search(sd): return 'loc'
    if _other.search(sd): return 'other'
    return 'unknown'



In [18]:
def test_find_seq(sd, targ): assert _find_seq(sd) == targ

test_find_seq('ax t1 +c', 't1')
test_find_seq('ax t1 flair +c', 't1')
test_find_seq('ax t2 +c', 't2')
test_find_seq('ax t2 flair', 'flair')
test_find_seq('ax t2 gre', 'swi')
test_find_seq('ax swi', 'swi')
test_find_seq('ax susc', 'swi')
test_find_seq('adc', 'dwi')
test_find_seq('eadc', 'dwi')
test_find_seq('ax dwi', 'dwi')
test_find_seq('ax diffusion', 'dwi')

test_find_seq('localizer', 'loc')
test_find_seq('dixon_opp', 'opposed phase')

In [19]:
#export
def _extract_label(sd):
    t = rm_extra_info(str(sd).lower())
    return _find_seq(t)


In [20]:
assert _extract_label('ax t1 +c') == 't1'
assert _extract_label('ax t1 +c [date]') == 't1'
assert _extract_label('<MPR Thick Range>') == 'unknown'

In [21]:
#export
def extract_labels(df):
    "Extract candidate labels from Series Descriptions and computed plane"
    df1 = df[['fname', 'SeriesDescription']].copy()
    df1['fname'] = df1.fname.apply(get_series_fp)
    print("Computing planes of imaging from `ImageOrientationPatient`.")
    df1['plane'] = df.apply(compute_plane, axis=1)
    print("Extracting candidate labels from `SeriesDescription`.")
    df1['seq_label'] = df1['SeriesDescription'].apply(_extract_label)
    print("Detecting contrast from `SeriesDescription` and `ContrastMediaAgent`.")
    df1['contrast'] = df.apply(detect_contrast, axis=1)
    return df1


In [22]:
#export
_keep = [
    'fname',
    # Patient info
    'PatientID',
    # Study info
    'StudyInstanceUID',
    'StudyID',
    # Series info
    'SeriesInstanceUID',
    'SeriesNumber',
    'SeriesDescription',
    'AcquisitionNumber',
    # Image info and features
    'InstanceNumber',
    'ImageOrientationPatient',
    'ScanningSequence',
    'SequenceVariant',
    'ScanOptions',
    'MRAcquisitionType',
    'AngioFlag',
    'SliceThickness',
    'RepetitionTime',
    'EchoTime',
    'EchoTrainLength',
    'PixelSpacing',
    'ContrastBolusAgent',
    'InversionTime',
    'DiffusionBValue',
    'ImageType',
    # Labels
    'plane',
    'seq_label',
    'contrast'
]

_dummies = [
    'ScanningSequence',
    'SequenceVariant',
    'ScanOptions',
    'ImageType'
]

_d_prefixes = [
    'seq',
    'var',
    'opt',
    'type'
]

_binarize = [
    'MRAcquisitionType',
    'AngioFlag',
    'ContrastBolusAgent',
    'DiffusionBValue'
]

_rescale = [
    'SliceThickness',
    'RepetitionTime',
    'EchoTime',
    'EchoTrainLength',
    'PixelSpacing',
    'InversionTime'
]

In [23]:
#export
def _make_col_binary(df, col):
    s = df[col].isna()
    if any(s):
        df[col] = s.apply(lambda x: 0 if x else 1)
    else:
        targ = df.loc[0, col]
        df[col] = df[col].apply(lambda x: 0 if x == targ else 1)


In [24]:
#export
def make_binary_cols(df, cols):
    df1 = df.copy()
    for col in cols:
        if col in df.columns:
            _make_col_binary(df1, col)
        else: 
            df1[col]=0
            _make_col_binary(df1,col)
    return df1



In [25]:
#export
def rescale_cols(df, cols):
    df1 = df.copy()
    scaler = MinMaxScaler()
    df1[cols] = scaler.fit_transform(df1[cols])
    return df1.fillna(0)


In [26]:
#export
def get_dummies(df, cols=_dummies, prefix=_d_prefixes):
    df1 = df.copy()
    for i, col in enumerate(cols):
        df1[col] = df1[col].fillna('NONE')
        mlb = MultiLabelBinarizer()
        df1 = df1.join(
            pd.DataFrame(mlb.fit_transform(df1.pop(col)), columns=mlb.classes_).add_prefix(f'{prefix[i]}_')
        )
    return df1


In [27]:
#export
_features = ['MRAcquisitionType', 'AngioFlag', 'SliceThickness', 'RepetitionTime',
       'EchoTime', 'EchoTrainLength', 'PixelSpacing', 'ContrastBolusAgent',
       'InversionTime', 'DiffusionBValue', 'seq_E', 'seq_EP', 'seq_G',
       'seq_GR', 'seq_I', 'seq_IR', 'seq_M', 'seq_P', 'seq_R', 'seq_S',
       'seq_SE', 'var_E', 'var_K', 'var_MP', 'var_MTC', 'var_N', 'var_O',
       'var_OSP', 'var_P', 'var_S', 'var_SK', 'var_SP', 'var_SS', 'var_TOF',
       'opt_1', 'opt_2', 'opt_A', 'opt_ACC_GEMS', 'opt_B', 'opt_C', 'opt_D',
       'opt_E', 'opt_EDR_GEMS', 'opt_EPI_GEMS', 'opt_F', 'opt_FAST_GEMS',
       'opt_FC', 'opt_FC_FREQ_AX_GEMS', 'opt_FC_SLICE_AX_GEMS',
       'opt_FILTERED_GEMS', 'opt_FR_GEMS', 'opt_FS', 'opt_FSA_GEMS',
       'opt_FSI_GEMS', 'opt_FSL_GEMS', 'opt_FSP_GEMS', 'opt_FSS_GEMS', 'opt_G',
       'opt_I', 'opt_IFLOW_GEMS', 'opt_IR', 'opt_IR_GEMS', 'opt_L', 'opt_M',
       'opt_MP_GEMS', 'opt_MT', 'opt_MT_GEMS', 'opt_NPW', 'opt_P', 'opt_PFF',
       'opt_PFP', 'opt_PROP_GEMS', 'opt_R', 'opt_RAMP_IS_GEMS', 'opt_S',
       'opt_SAT1', 'opt_SAT2', 'opt_SAT_GEMS', 'opt_SEQ_GEMS', 'opt_SP',
       'opt_T', 'opt_T2FLAIR_GEMS', 'opt_TRF_GEMS', 'opt_VASCTOF_GEMS',
       'opt_VB_GEMS', 'opt_W', 'opt_X', 'opt__', 'type_ADC', 'type_DIFFUSION', 'type_DERIVED']

In [28]:
#export
def preprocess(df, keep=_keep, dummies=_dummies, d_prefixes=_d_prefixes, binarize=_binarize, rescale=_rescale):
    "Preprocess metadata for Random Forest classifier to predict sequence type"
    print("Preprocessing metadata for Random Forest classifier.")
    df1 = exclude_other(df)
    print(f"Have received {df1.shape[0]} entries.")
    df1 = df1[[col for col in keep if col in df1.columns]]
    if df1['PixelSpacing'].any:
        df1['PixelSpacing'] = df1['PixelSpacing'].apply(lambda x: x[0])
    df1 = get_dummies(df1, dummies, d_prefixes)
    df1 = make_binary_cols(df1, binarize)
    df1 = rescale_cols(df1, rescale)
    for f in _features:
        if f not in df1.columns:
            df1[f] = 0
    return df1



In [29]:
#export

#get labels from a text file in the format of comma deliminted four columns ()
def labels_from_file(label_path, column_names):

    label_df = pd.read_csv(label_path,header=None)
    label_df.columns=column_names

    
    return label_df



In [30]:
#export

#add the text label that corresponds to the numerical code via the dictionary abd_label_dict, add columns from info in the fname to merge with extracted label dataframe

def convert_labels_from_file(label_df):
    labels=label_df.copy()
    labels['GT label'] = labels['label_code'].astype(str).apply(lambda x: abd_label_dict[x]['short'])
    labels['GT plane'] = labels['label_code'].astype(str).apply(lambda x: abd_label_dict[x]['plane'])
    labels['GT contrast'] = labels['label_code'].astype(str).apply(lambda x: abd_label_dict[x]['contrast'])
    labels['patientID'] = labels['patientID'].astype(str)
#    labels['Parent_folder'] = labels['fname'].astype(str).apply(lambda x: x.split('/')[0])
#    labels['patientID'] = labels['fname'].astype(str).apply(lambda x: x.split('/')[1]).astype(int)
#    labels['exam'] = labels['fname'].astype(str).apply(lambda x: x.split('/')[2])
#    labels['series'] = labels['fname'].astype(str).apply(lambda x: x.split('/')[3])
    
    return labels



In [31]:
#export

def expand_filename_into_columns(df, cols):
    for iterator in range(len(cols)):
        df[cols[iterator]] = df['fname'].astype(str).apply(lambda x: x.split('/')[iterator])
    return df

        