# Ullevål MRI protocol exploration

In [1]:
import pandas as pd
import pandera as pa
import numpy as np
import regex as re

* Load dataset and delete first empty column. 
* Name columns.
* Create column that indicates if child protocol or not

In [2]:
mr_protokoller = pd.read_csv('protokoller.csv')
mr_protokoller = mr_protokoller.iloc[:,1:]
mr_protokoller.columns = ['Protocol name', 'Nativ', 'Siemens', 'Philips', 'GE', 'Contrast-comments', 'CC-Siemens', 'CC-Philips', 'CC-GE', 'Post-contrast', 'PC-Siemens', 'PC-Philips', 'PC-GE',
       'AC-1', 'AC-2', 'AC-3', 'AC-4', 'AC-5', 'AV-6', 'AC-7', 'AC-8']
mr_protokoller.loc[mr_protokoller['Protocol name'].str.contains('BKC|Barn|BORB|BARN|BØNH', regex=True),'Barneprotokoll'] = 1
mr_protokoller.loc[mr_protokoller['Barneprotokoll'] != 1, 'Barneprotokoll'] = 0
mr_protokoller['Barneprotokoll'] = mr_protokoller['Barneprotokoll'].astype(int)
mr_protokoller = mr_protokoller.loc[mr_protokoller['Barneprotokoll'] == 0, :]


In [3]:
# Delete Loc from protocol
def del_loc(protocol_string):
    if protocol_string is np.nan:
        return protocol_string
    elif protocol_string[:3] == 'Loc':
        return protocol_string[4:]
    else:
        return protocol_string

### Start with Siemens adult protocols

In [4]:
mr_protokoller['Siemens'] = mr_protokoller['Siemens'].apply(del_loc) 
siemens_pre_contrast = mr_protokoller.iloc[:,:3]
siemens_pre_contrast

Unnamed: 0,Protocol name,Nativ,Siemens
0,MR KC1: Caput standard utredning (MRKC),Nativ,"Sag T1 3D mprage,Tra DWI,Sag 3D Flair FS,Tra T..."
1,MR KC2: Caput m/kontrast (MRKCK),Prekontrast,"Sag T1 3D Space,Tra DWI"
2,MR KC3: Caput Cerebralt infarkt(MRKCINF),Nativ,"Sag T1 TSE,Tra T2 4mm,Tra Flair 4mm,Tra SWI,T..."
3,MR KC4: Caput demyeliniserende sykdom (MRKCMS),Prekontrast,"Sag T1 3D Space,Tra DWI"
4,MR KC5: Caput Traume (MRKCTR),Nativ,"Sag T1 3D mprage,Sag 3D Flair,Tra T2 TSE,Tra S..."
...,...,...,...
57,MR PB2: Plexus brachialis traume,Nativ,"Cor T2 3D STIR Space,Cor T1 3D Space,Cor T2 3D..."
58,MR PB3: Plexus brachialis tumor,Prekontrast,"Cor T2,3D,STIR,Cor T1 3D Space,Cor T2 3D Space"
59,MR PL1: Plexus lumbosacralis standard,Nativ,"Cor T1 3D Vibe Dixon,Cor T2 3D Space STIR,Konf..."
60,MR PL2: Plexus lumbosacralis traume,Nativ,"Cor T1 3D Vibe Dixon,Cor T2 3D STIR Space"


In [5]:
# Make number of columns for dataframe that matches number of sequences
def make_columns(antall, word):
    n = 1
    columns = []
    for i in range(antall):
        name = word+'_'+str(n)
        n += 1
        columns.append(name)
    return columns 

In [6]:
siemens_antall_precontrast = siemens_pre_contrast['Siemens'].str.split(',', expand=True)
antall = len(siemens_antall_precontrast.columns)
siemens_antall_precontrast.columns = make_columns(antall, 'Siemens')
siemens_pre_contrast = siemens_pre_contrast[['Protocol name', 'Nativ']].join(siemens_antall_precontrast)
siemens_pre_contrast.to_csv('siemens.csv')
siemens_pre_contrast.head(4)

Unnamed: 0,Protocol name,Nativ,Siemens_1,Siemens_2,Siemens_3,Siemens_4,Siemens_5,Siemens_6,Siemens_7,Siemens_8,Siemens_9,Siemens_10,Siemens_11,Siemens_12,Siemens_13,Siemens_14
0,MR KC1: Caput standard utredning (MRKC),Nativ,Sag T1 3D mprage,Tra DWI,Sag 3D Flair FS,Tra T2 TSE,,,,,,,,,,
1,MR KC2: Caput m/kontrast (MRKCK),Prekontrast,Sag T1 3D Space,Tra DWI,,,,,,,,,,,,
2,MR KC3: Caput Cerebralt infarkt(MRKCINF),Nativ,Sag T1 TSE,Tra T2 4mm,Tra Flair 4mm,Tra SWI,Tra DWI resolve 4mm,,,,,,,,,
3,MR KC4: Caput demyeliniserende sykdom (MRKCMS),Prekontrast,Sag T1 3D Space,Tra DWI,,,,,,,,,,,,


In [7]:
plane = ['sag', 'cor', 'tra']
sequence_contrast = ['T1','T2','DWI','PD']
mode = ['2D','3D']
sequence_type = ['MPRAGE','SPACE','TSE','TOF','DIXON','VIBE']
term_dict = {'plane':plane, 'sequence contrast': sequence_contrast, 'mode': mode, 'sequence type': sequence_type}

In [8]:
def make_reg_ex_or(reg_terms):
    return ('|').join(reg_terms)
    

In [9]:
def search_term(element, key, reg_term):
    if isinstance(element, str):
        result = re.search(reg_term, element, re.IGNORECASE)
        if result:
            term = result.group(0)
            term = term.upper()
            #term = term[0].upper()+term[1:]
            data[key].append(term)
        else:
            data[key].append(np.nan)
    else:
        data[key].append(np.nan)

In [10]:
'2d'.upper()

'2D'

In [11]:
pandas_series = siemens_pre_contrast['Siemens_1']
data = {}.fromkeys(term_dict.keys(), [])
for entry in pandas_series.tolist():
        for column in data.keys():
                term = make_reg_ex_or(term_dict[column])
                search_term(entry, column, term)

In [12]:
pd.DataFrame(data).to_csv('plane_contrast.csv')

In [13]:
results = pd.DataFrame(data)
results

Unnamed: 0,plane,sequence contrast,mode,sequence type
0,SAG,SAG,SAG,SAG
1,T1,T1,T1,T1
2,3D,3D,3D,3D
3,MPRAGE,MPRAGE,MPRAGE,MPRAGE
4,SAG,SAG,SAG,SAG
...,...,...,...,...
243,VIBE,VIBE,VIBE,VIBE
244,COR,COR,COR,COR
245,T1,T1,T1,T1
246,3D,3D,3D,3D
