In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

In [2]:
mri_df = pd.read_csv('mri_progression.csv')
events_df = pd.read_csv('events_biomarkers.csv')

In [3]:
print(mri_df.head(1))

   Unnamed: 0  Image Data ID  Subject    Group Sex  Age  Visit Modality  \
0           3         406544     3817  Control   M   75      0      MRI   

                 Description       Type   Acq Date Format Downloaded  \
0  T2 in T1-anatomical space  Processed  1/15/2013  NiFTI        NaN   

   Progression  Stage  
0            0      0  


In [12]:
def event_index(image_id):
    img_row = mri_df[mri_df['Image Data ID'] == image_id]
    visit = int(img_row['Visit'])
    
    years_rows = events_df[events_df['PATNO'] == img_row['Subject'].iloc[0]]
    y1 = years_rows[years_rows['YEAR'] == 1]
    y2 = years_rows[years_rows['YEAR'] == 2]
    y3 = years_rows[years_rows['YEAR'] == 3]
    
    
    if visit < 12 and y1.shape[0] > 0:
        return y1.index[0]
    elif visit < 24 and y2.shape[0] > 0:
        return y2.index[0]
    elif visit < 36 and y3.shape[0] > 0:
        return y3.index[0]
    else:
        return -1
    
mri_df['Event ID'] = [event_index(i) for i in mri_df['Image Data ID']]

In [13]:
mri_df.head(1)

Unnamed: 0.1,Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded,Progression,Stage,Event ID
0,3,406544,3817,Control,M,75,0,MRI,T2 in T1-anatomical space,Processed,1/15/2013,NiFTI,,0,0,348


In [14]:
no_prog_mri = mri_df[mri_df['Event ID'] == -1]
print(no_prog_mri.shape)

(2043, 16)


In [15]:
no_prog_mri.head(1)

Unnamed: 0.1,Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded,Progression,Stage,Event ID
1,10,410941,3816,Control,M,66,0,MRI,T2 in T1-anatomical space,Processed,12/04/2012,NiFTI,,0,0,-1


In [16]:
print(mri_df.shape)

(4518, 16)


In [38]:
import pickle

features_train = pickle.load( open( "features_train.pickle", "rb" ) )
features_test = pickle.load( open( "features_test.pickle", "rb" ) )

In [43]:
def get_image_id(filename):
    parts = filename.split('/')
    patno = pd.to_numeric(parts[4])
    descr = ' '.join(parts[5].split('_'))
    
    pat_rows = mri_df[mri_df['Subject'] == patno]
    descr_rows = pat_rows[pat_rows['Description'] == descr]
    
    if descr_rows.shape[0] > 0:
        return descr_rows.iloc[0]['Image Data ID']
    
# get_image_id('../ppmi/cd-pd/PPMI/3122/Axial_PD-T2_TSE/2012-05-09_09_21_52.0/S151230')

In [44]:
def feature_row(filename, dataset):
    img = get_image_id(filename)
    if img:
        img_feat = dataset[filename].flatten()
        return pd.DataFrame([[img] + img_feat])

feature_df = pd.concat([feature_row(f, features) for f in features], ignore_index=True)
print(feature_df.shape)

(177, 409600)
