In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os

In [2]:
mri_df = pd.read_csv('mri_progression.csv')
events_df = pd.read_csv('events_biomarkers.csv')

In [3]:
def event_index(image_id):
    img_row = mri_df[mri_df['Image Data ID'] == image_id]
    visit = int(img_row['Visit'])
    
    years_rows = events_df[events_df['PATNO'] == img_row['Subject'].iloc[0]]
    y1 = years_rows[years_rows['YEAR'] == 1]
    y2 = years_rows[years_rows['YEAR'] == 2]
    y3 = years_rows[years_rows['YEAR'] == 3]
    
    
    if visit < 12 and y1.shape[0] > 0:
        return y1.index[0]
    elif visit < 24 and y2.shape[0] > 0:
        return y2.index[0]
    elif visit < 36 and y3.shape[0] > 0:
        return y3.index[0]
    else:
        return -1
    
mri_df['Event ID'] = [event_index(i) for i in mri_df['Image Data ID']]

In [4]:
train_ids1 = pd.read_csv('train_ids0.txt', header=None)
test_ids1 = pd.read_csv('test_ids0.txt', header=None)
train_y1 = pd.read_csv('train_y0.txt', header=None)
test_y1 = pd.read_csv('test_y0.txt', header=None)

In [5]:
print(train_ids1.head(1))

                                                   0
0  ../ppmi/cd-pd/PPMI/3817/Axial_PD-T2_TSE/2011-1...


In [6]:
def get_image_id(filename):
    parts = filename.split('/')
    patno = pd.to_numeric(parts[4])
    descr = ' '.join(parts[5].split('_'))
    
    pat_rows = mri_df[mri_df['Subject'] == patno]
    descr_rows = pat_rows[pat_rows['Description'] == descr]
    
    if descr_rows.shape[0] > 0:
        r = descr_rows.iloc[0]
        return [r['Image Data ID'], r['Event ID']]
    else:
        return [-1, -1]
    
train_ids1['Image Data ID'] = [get_image_id(f)[0] for f in train_ids1[0]]
test_ids1['Image Data ID'] = [get_image_id(f)[0] for f in test_ids1[0]]
# print(len(test_ids1.index))
# print(len([get_image_id(f)[0] for f in train_ids1[0]]))

train_ids1['Event ID'] = [get_image_id(f)[1] for f in train_ids1[0]]
test_ids1['Event ID'] = [get_image_id(f)[1] for f in test_ids1[0]]

In [7]:
train_ids1['Prediction'] = train_y1
test_ids1['Prediction'] = test_y1

In [8]:
def get_image_prediction(event_id, dataset):
    d_rows = dataset[dataset['Event ID'] == event_id]
    pred = d_rows['Prediction'].sum() / d_rows.shape[0]
    return pred

train_events = train_ids1['Event ID'].value_counts().index
events_train = events_df[events_df.index.isin(train_events)]
events_train['Prediction'] = [get_image_prediction(eid, train_ids1) for eid in events_train.index]
print(events_train.head(1))

test_events = test_ids1['Event ID'].value_counts().index
events_test = events_df[events_df.index.isin(test_events)]
events_test['Prediction'] = [get_image_prediction(eid, test_ids1) for eid in events_test.index]
print(events_test.head(1))

   Unnamed: 0  APOE_e4  APPRDX  EDUCYRS EVENT_ID  HISPLAT  HVLTFPRL  HVLTRDLY  \
1          16        0       2       14      V04        2         0        11   

   HVLTREC  MAPT_cat  ...  stai_trait    tau  tau_asyn  urate  Progression  \
1       12         1  ...          31  298.9   0.11694    321            0   

   race_white  race_black  race_asian  race_other  Prediction  
1         1.0         0.0         0.0         0.0         1.0  

[1 rows x 74 columns]
   Unnamed: 0  APOE_e4  APPRDX  EDUCYRS EVENT_ID  HISPLAT  HVLTFPRL  HVLTRDLY  \
0          13        0       2       16      V06        2         0         8   

   HVLTREC  MAPT_cat  ...  stai_trait    tau  tau_asyn  urate  Progression  \
0       12         2  ...          29  125.6   0.11285    464            0   

   race_white  race_black  race_asian  race_other  Prediction  
0         1.0         0.0         0.0         0.0         1.0  

[1 rows x 74 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [9]:
events_train.to_csv('events_train0.csv')
events_test.to_csv('events_test0.csv')

In [None]:
a0 = pd.read_csv('train_ids0.txt', header=None)
b0 = pd.read_csv('train_ids0.txt', header=None)

a1 = 