# 00 Comparing subjects I have data for vs those in Randy's Database

In [1]:
# install required packages - commented out so it doesn't install every time
#%conda install -n Lauren openpyxl numpy pandas statsmodels plotnine matplotlib scikit-learn scipy mizani nbconvert pandoc pyreadstat kmodes seaborn

# import required packages
import openpyxl; 
import datetime;
import numpy as np;
import pandas as pd;
import statsmodels as sm;
import statsmodels.formula.api as smf;
import plotnine as p9;
import matplotlib;
#import sklearn;
import scipy;
from scipy.stats import linregress;
from scipy.stats import pearsonr;
import itertools
from sklearn import preprocessing 
import inspect
import pickle

data_folder_loc = "//admsyn/Primary/ADM/CustomerStudies/Rockefeller/Riluzole_Biomarkers/"
code_folder_loc = "//admsyn/homes/@DH-ADMDX/0/lauren.koenig-1606/code/Riluzole FDG/"

In [2]:
# read in data
all_data, all_data_age_correct = pd.read_pickle(code_folder_loc + '/output/02_data.pkl')

In [4]:

# define biomarker groups
FDG_columns = ['AC_gm','Avg_Hip','R_Hip', 'L_Hip', 'Avg_MedOrbFrontal', 'Avg_PCC', 'FRONTAL_gm', 'MTL_gm',  'Par_gm', 'PostCing_gm', 'Precun_gm', 'Temp', 'Temp_gm', 'CV1']

plasma_columns = [ 'Ab40', 'Ab42', 'Ab42_40', 'GFAP','NFL',  'pTau181', 'pTau217',  'pTau231', 'pTau181_Ab42']
cog_columns = ['Screening_MMSE', 'adascogtotal', 'bvrt', 'dstotal','tma','tmb',  'cowattotal', 'cdrtotal', 'cdrsum', 'adltotal', 'npitotal', 'gds']
FDG_columns_pons = ['Temp_gm_ponsref', 'MTL_gm_ponsref', 'PostCing_gm_ponsref', 'Par_gm_ponsref', 'Precun_gm_ponsref', 'AC_gm_ponsref']
FDG_columns_para2 = ['Temp_gm_refpara2', 'MTL_gm_refpara2', 'PostCing_gm_refpara2', 'Par_gm_refpara2', 'Precun_gm_refpara2', 'AC_gm_refpara2', 'Avg_PCC_refpara2']
vol_columns = ['VOL_Precun_Lz', 'VOL_Precun_Rz', 'VOL_InfPar_Lz', 'VOL_InfPar_Rz', 'VOL_Hip_Lz', 'VOL_Hip_Rz', 'VOL_TotalGrayz', 'VOL_Inf_Mid_Fus_Temp_Lz', 
'VOL_Inf_Mid_Fus_Temp_Rz','VOL__MidFront_Lz', 'VOL_MidFront_Rz']

# all imaging data

In [5]:
randy_db = pd.read_excel(data_folder_loc + "original/discrete/Rockefeller_Database__Ver27__090920.xlsx", sheet_name='RKF Database', skiprows=13)

In [6]:
randy_db['FDG_output'] = randy_db['Grey matter'].notna()

randy_db['FS_output'] = randy_db['AparcPl_meancurv_R_TRANSVTEMP'].replace('pending', np.nan).astype('float64').notna()
randy_db['FS_pending'] = randy_db['AparcPl_meancurv_R_TRANSVTEMP'].isin(['pending'])

randy_db['SPM_output'] = randy_db['Total'].replace('pending?', np.nan).notna()
randy_db['SPM_pending'] = randy_db['AparcPl_meancurv_R_TRANSVTEMP'].isin(['pending?'])


randy_db['PredictedClass_output'] = randy_db['PredictedClass:'].notna()


randy_db['any_mri_output'] = randy_db[[ 'FS_output', 'SPM_output', 'PredictedClass_output']].any(axis = 1)

In [7]:
randy_db.columns[0:100]

Index(['INDEX-2', 'ProcDate', 'FDG multiframe?', 'INDEX-1', 'FDG Visit',
       'Completed Protocol? (FDG)', 'Completed Protocol? (MRI)', 'Unnamed: 7',
       'Subject', 'AGE', 'Gender', 'Visit (ALPHA) FDG', 'Visit (ALPHA)  MRI',
       'Visit (ADM)', 'Image source', 'Unnamed: 15',
       'FDG Visit Date (from DICOM)', 'Unnamed: 17', 'FileName:', 'CV1:',
       'CV2:', 'CV3:', 'CV4:', 'CV5:', 'P_NL:', 'P_MA:', 'P_AD:', 'P_SAD:',
       'P_FTD:', 'P_LBD:', 'P_SD:', 'PredictedClass:', 'Unnamed: 32',
       'FileName:.1', 'CV1:.1', 'P_1.1:', 'P_1.2:', 'P_2.1:', 'P_2.2:',
       'P_4.1:', 'P_6.1:', 'P_6.2:', 'P_7.1:', 'P_8.1:', 'P_8.3:',
       'PredictedClass:.1', 'Unnamed: 46', 'LPT Source', 'x', 'Grey matter',
       'Whole Brain Without Ventricle', 'R Occipital Lobe', 'L Occipital Lobe',
       'R Inferior Parietal Lobe', 'L Inferior Parietal Lobe', 'R MFG',
       'L MFG', 'R Prefrontal', 'L Prefrontal', 'R Lateral Temporal Lobe',
       'L Lateral Temporal Lobe', 'R MTL', 'L MTL', 'R

In [8]:
randy_db['FileName:']

0                                                    NaN
1      n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...
2                                                    NaN
3      n2a_sUR_i_n2a_rADNI_APE_792_201B__static-FDG__...
4                                                    NaN
                             ...                        
330                                                  NaN
331                                                  NaN
332                                                  NaN
333                                                  NaN
334                                                  NaN
Name: FileName:, Length: 335, dtype: object

In [9]:
randy_db['PredictedClass:'].isna().value_counts()

True     236
False     99
Name: PredictedClass:, dtype: int64

In [10]:
randy_db['FS_output'].value_counts()

False    260
True      75
Name: FS_output, dtype: int64

In [11]:
randy_db[['Completed Protocol? (FDG)','FDG_output']].value_counts()

Completed Protocol? (FDG)  FDG_output
Y                          True          84
X                          False         43
N                          True          15
                           False          1
dtype: int64

In [12]:
randy_db.loc[randy_db['Completed Protocol? (FDG)'].isin(['N']) & (randy_db['FDG_output']==True), 
             [
                 
                  'Subject', 'AGE', 'Gender', 'Visit (ADM)',       
          
        'FDG Visit', 'FDG Visit Date (from DICOM)', 'Unnamed: 17',    
        #'FDG multiframe?',  'Image source',  'ProcDate',  
       'Completed Protocol? (FDG)',
        'LPT Source', 'FDG_output',  
             ]
             ]

Unnamed: 0,Subject,AGE,Gender,Visit (ADM),FDG Visit,FDG Visit Date (from DICOM),Unnamed: 17,Completed Protocol? (FDG),LPT Source,FDG_output
21,RKF-APE-009,75,M,m00,Visit-1 only,2014-05-20 00:00:00,,N,lpt-n2a_sUR_i_n2a_rADNI_APE_792_209A__static-F...,True
61,RKF-APE-021,UNK,O,m00,Visit-1 only,2015-02-23 00:00:00,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_221a_nifti_4fra...,True
65,RKF-APE-022,UNK,O,m00,Visit-1 only,2015-02-24 00:00:00,,N,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_222A__dyn-F...,True
69,RKF-APE-023,UNK,O,m00,Visit-1 only,2015-03-10 00:00:00,,N,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_223A__dyn-F...,True
97,RKF-APE-035,UNK,O,m00,Visit-1 only,UNK,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_235a_nifti_4fra...,True
107,RKF-APE-039,UNK,O,m00,Visit-1 only,UNK,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_239a_nifti_4fra...,True
111,RKF-APE-040,UNK,O,m00,Visit-1 only,UNK,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_240a_nifti_4fra...,True
156,RKF-APE-060,UNK,O,m00,Visit-1 only,2016-08-23 00:00:00,,N,lpt-i_n2a_sUR_rADNI_mc_AEP_792_260A_multiframe...,True
166,RKF-APE-065,81,M,m00,Visit-1 only,2017-05-17 00:00:00,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_265A_mutiframe_...,True
176,RKF-APE-071,69,M,m00,Visit-1 only,2018-07-20 00:00:00,,N,lpt-i_n2a_sUR_rADNI_mc_APE_792_171A_multiframe...,True


In [13]:
randy_db[['MRI ReName',        '7-Character machine UID', 
]].notna().value_counts()

MRI ReName  7-Character machine UID
False       False                      217
True        True                       110
False       True                         4
True        False                        4
dtype: int64

In [14]:
randy_db[['any_mri_output', 
    'PredictedClass:', 
       ]].value_counts().sort_index()

any_mri_output  PredictedClass:
True            AD                 55
                LBD                 1
                MA                 10
                NL                  7
                SAD                25
                SD                  1
dtype: int64

In [15]:
randy_db.loc[randy_db['MRI ReName'].notna() & randy_db['any_mri_output']== False,
             [

       
       'MRI ReName',  'any_mri_output' 

       ]
             ].value_counts()

MRI ReName  any_mri_output
1.5456      False             1
1545.6000   False             1
dtype: int64

In [16]:
randy_db.loc[randy_db['MRI ReName'].notna() & randy_db['any_mri_output']== False,
             [
   
       'Subject', 'AGE', 'Gender', 'Visit (ADM)',       
          
       # 'FDG Visit', 'FDG Visit Date (from DICOM)', 'Unnamed: 17',    
        #'FDG multiframe?',  'Image source',  'ProcDate',  
       #'Completed Protocol? (FDG)',
       # 'LPT Source', 'FDG_output',  
       
       'MRI ReName',   
       #'Comment(s)', 'MRI Visit Date',
       'Unnamed: 218', 
       #'Completed Protocol? (MRI)',  
       #'7-Character machine UID', 
       #'Comment(s) -- MRIs as received',   'Image data format',
        #'YYYYMMDD', 'FileName:', 
'any_mri_output', 
    'PredictedClass:', 
       'FS_output', 'FS_pending', 
       'SPM_output', 'SPM_pending'
       ]
             ]

Unnamed: 0,Subject,AGE,Gender,Visit (ADM),MRI ReName,Unnamed: 218,any_mri_output,PredictedClass:,FS_output,FS_pending,SPM_output,SPM_pending
0,,,,,,,False,,False,False,False,False
4,,,,,,,False,,False,False,False,False
5,,,,,,,False,,False,False,False,False
6,,,,,,,False,,False,False,False,False
8,RKF-APE-005,81,M,m04,,2014-08-11 00:00:00,False,,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
330,,,,,,,False,,False,False,False,False
331,,,,,,,False,,False,False,False,False
332,,,,,,,False,,False,False,False,False
333,,,,,1.5456,,False,,False,False,False,False


In [17]:
randy_db[[ 'any_mri_output']].value_counts()

any_mri_output
False             222
True              113
dtype: int64

In [18]:
randy_db[[
   
       'Subject', 'AGE', 'Gender', 'Visit (ADM)',       
          
       # 'FDG Visit', 'FDG Visit Date (from DICOM)', 'Unnamed: 17',    
        #'FDG multiframe?',  'Image source',  'ProcDate',  
       #'Completed Protocol? (FDG)',
       # 'LPT Source', 'FDG_output',  
       
       'MRI ReName', 
       #'Comment(s)',   'MRI Visit Date', 'Unnamed: 218', 'Completed Protocol? (MRI)',  
       #'Comment(s) -- MRIs as received',   'Image data format',
       # 'YYYYMMDD', 
       '7-Character machine UID',
       'FileName:', 
'any_mri_output', 
    'PredictedClass:', 
       'FS_output', 'FS_pending', 
       'SPM_output', 'SPM_pending'
       ]]

Unnamed: 0,Subject,AGE,Gender,Visit (ADM),MRI ReName,7-Character machine UID,FileName:,any_mri_output,PredictedClass:,FS_output,FS_pending,SPM_output,SPM_pending
0,,,,,,,,False,,False,False,False,False
1,RKF-APE-001,76,M,m00,RKF-APE-001__m00__T1-a__20131125__e010542.nii,e010542,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,True,MA,True,False,True,False
2,RKF-APE-001,76,M,m04,RKF-APE-001__m04__T1-b__20140403__e010678.nii,e010678,,True,,True,False,True,False
3,RKF-APE-001,76,M,m06,RKF-APE-001__m06__T1-c__20140701__n010799.nii,n010799,n2a_sUR_i_n2a_rADNI_APE_792_201B__static-FDG__...,True,MA,True,False,True,False
4,,,,,,,,False,,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,,,,,,,,False,,False,False,False,False
331,,,,,,,,False,,False,False,False,False
332,,,,,,0.5035,,False,,False,False,False,False
333,,,,,1.5456,0.5035,,False,,False,False,False,False


In [19]:
randy_db.columns

Index(['INDEX-2', 'ProcDate', 'FDG multiframe?', 'INDEX-1', 'FDG Visit',
       'Completed Protocol? (FDG)', 'Completed Protocol? (MRI)', 'Unnamed: 7',
       'Subject', 'AGE',
       ...
       'AparcPl_meancurv_R_TRANSVTEMP', 'INDEX-1.1', 'INDEX-2.1', 'FDG_output',
       'FS_output', 'FS_pending', 'SPM_output', 'SPM_pending',
       'PredictedClass_output', 'any_mri_output'],
      dtype='object', length=1923)

In [20]:
randy_db[['INDEX-2', 'ProcDate', 'FDG multiframe?', 'INDEX-1', 'FDG Visit',
       'Completed Protocol? (FDG)', 'Completed Protocol? (MRI)', 'Unnamed: 7',
       'Subject', 'AGE', 'Gender', 'Visit (ALPHA) FDG', 'Visit (ALPHA)  MRI',
       'Visit (ADM)', 'Image source', 'Unnamed: 15',
       'FDG Visit Date (from DICOM)', 'Unnamed: 17', 'FileName:', 'PredictedClass:', 'Grey matter', 'L_Hippocampal_tail']]

Unnamed: 0,INDEX-2,ProcDate,FDG multiframe?,INDEX-1,FDG Visit,Completed Protocol? (FDG),Completed Protocol? (MRI),Unnamed: 7,Subject,AGE,...,Visit (ALPHA) MRI,Visit (ADM),Image source,Unnamed: 15,FDG Visit Date (from DICOM),Unnamed: 17,FileName:,PredictedClass:,Grey matter,L_Hippocampal_tail
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,20150320,,1.0,Visit-1 (FDG),Y,Y,,RKF-APE-001,76,...,A,m00,static,,2013-11-26 00:00:00,2013-11-25 00:00:00,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,MA,147453.0,394.74403
2,3,20190604,,2.0,Visit-1.5 (MRI),X,MID,,RKF-APE-001,76,...,B,m04,,,,,,,,387.888615
3,4,20150320,,3.0,Visit-2 (FDG),Y,Y,,RKF-APE-001,76,...,C,m06,static,,2014-06-30 00:00:00,2014-06-30 00:00:00,n2a_sUR_i_n2a_rADNI_APE_792_201B__static-FDG__...,MA,148920.0,337.627806
4,5,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,331,,,,,,,,,,...,,,,,,,,,,
331,332,,,,,,,,,,...,,,,,,,,,,
332,333,,,,,,,,,,...,,,,,,,,,,
333,334,,,,,,,,,,...,,,,,,,,,,


In [21]:
randy_db_subset = randy_db[[
       'Subject', 'AGE', 'Gender', 'Visit (ADM)',       
        'LPT Source', 'FileName:', 'FDG_output',  
       'MRI ReName',  'FS_output'

]]

randy_db_subset = randy_db_subset[randy_db_subset['Visit (ADM)'].isin(['m00', 'm00x', np.nan])].reset_index(drop = True)
randy_db_subset = randy_db_subset[randy_db_subset['Subject'].notna()].reset_index(drop = True)

#randy_db_subset = randy_db_subset[(randy_db_subset['FDG_output'] == True) | (randy_db_subset['FS_output'] == True) | (randy_db_subset['MRI ReName'].notna())]


In [22]:
randy_db_subset['Subject_Label'] = randy_db_subset['Subject'].str[4:7].str.replace('APE', 'APE-792')
randy_db_subset['Subject_Num'] = randy_db_subset['Subject'].str[8:].astype('int')
randy_db_subset['ID_ADM'] = randy_db_subset['Subject_Label'] + '_' + randy_db_subset['Subject_Num'].astype('str')

In [23]:
randy_db_subset

Unnamed: 0,Subject,AGE,Gender,Visit (ADM),LPT Source,FileName:,FDG_output,MRI ReName,FS_output,Subject_Label,Subject_Num,ID_ADM
0,RKF-APE-001,76,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_201A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,True,RKF-APE-001__m00__T1-a__20131125__e010542.nii,True,APE-792,1,APE-792_1
1,RKF-APE-005,81,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_205A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_205A__static-FDG__...,True,RKF-APE-005__m00__T1-a__20140428__e010707.nii,True,APE-792,5,APE-792_5
2,RKF-APE-007,67,F,m00,lpt-sUR_i_n2a_rADNI_APE_792_207A_frame1__nmsks...,sUR_i_n2a_rADNI_APE_792_207A_frame1__nmsks.sn.img,True,RKF-APE-007__m00__T1-a__20140418__e010695.nii,True,APE-792,7,APE-792_7
3,RKF-APE-009,75,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_209A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_209A__static-FDG__...,True,RKF-APE-009__m00__T1-a__20140520__e010737.nii,True,APE-792,9,APE-792_9
4,RKF-APE-010,58,F,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_210A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_210A__static-FDG__...,True,RKF-APE-010__m00__T1-a__20140530__e010753.nii,True,APE-792,10,APE-792_10
5,RKF-APE-011,81,F,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_211A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_211A__static-FDG__...,True,RKF-APE-011__m00__T1-a__20140527__e010746.nii,True,APE-792,11,APE-792_11
6,RKF-APE-013,80,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-FDG_0...,True,RKF-APE-013__m00__T1-a__20140630__e010796.nii,True,APE-792,13,APE-792_13
7,RKF-APE-016,77,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-FDG_0...,True,RKF-APE-016__m00__T1-a__20141121__n115548.nii,True,APE-792,16,APE-792_16
8,RKF-APE-018,73,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-FDG_0...,True,RKF-APE-018__m00__T1-a__20141125__n102556.nii,True,APE-792,18,APE-792_18
9,RKF-APE-020,71,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-FDG_0...,True,RKF-APE-020__m00__T1-a__20141126__n112426.nii,True,APE-792,20,APE-792_20


In [24]:
randy_db_subset['Subject'].value_counts()

RKF-APE-001    1
RKF-RIL-013    1
RKF-APE-066    1
RKF-APE-071    1
RKF-RIL-001    1
RKF-RIL-002    1
RKF-RIL-003    1
RKF-RIL-005    1
RKF-RIL-007    1
RKF-RIL-008    1
RKF-RIL-009    1
RKF-RIL-010    1
RKF-RIL-011    1
RKF-RIL-012    1
RKF-RIL-014    1
RKF-APE-005    1
RKF-RIL-015    1
RKF-RIL-017    1
RKF-RIL-018    1
RKF-RIL-019    1
RKF-RIL-020    1
RKF-RIL-021    1
RKF-RIL-022    1
RKF-RIL-024    1
RKF-RIL-025    1
RKF-RIL-027    1
RKF-RIL-028    1
RKF-RIL-029    1
RKF-APE-065    1
RKF-APE-062    1
RKF-APE-060    1
RKF-APE-058    1
RKF-APE-007    1
RKF-APE-009    1
RKF-APE-010    1
RKF-APE-011    1
RKF-APE-013    1
RKF-APE-016    1
RKF-APE-018    1
RKF-APE-020    1
RKF-APE-021    1
RKF-APE-022    1
RKF-APE-023    1
RKF-APE-026    1
RKF-APE-027    1
RKF-APE-033    1
RKF-APE-034    1
RKF-APE-035    1
RKF-APE-036    1
RKF-APE-039    1
RKF-APE-040    1
RKF-APE-046    1
RKF-APE-047    1
RKF-APE-050    1
RKF-APE-053    1
RKF-APE-055    1
RKF-APE-056    1
RKF-RIL-031    1
Name: Subject,

## compare to plasma

In [25]:
randy_db_subset

Unnamed: 0,Subject,AGE,Gender,Visit (ADM),LPT Source,FileName:,FDG_output,MRI ReName,FS_output,Subject_Label,Subject_Num,ID_ADM
0,RKF-APE-001,76,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_201A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,True,RKF-APE-001__m00__T1-a__20131125__e010542.nii,True,APE-792,1,APE-792_1
1,RKF-APE-005,81,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_205A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_205A__static-FDG__...,True,RKF-APE-005__m00__T1-a__20140428__e010707.nii,True,APE-792,5,APE-792_5
2,RKF-APE-007,67,F,m00,lpt-sUR_i_n2a_rADNI_APE_792_207A_frame1__nmsks...,sUR_i_n2a_rADNI_APE_792_207A_frame1__nmsks.sn.img,True,RKF-APE-007__m00__T1-a__20140418__e010695.nii,True,APE-792,7,APE-792_7
3,RKF-APE-009,75,M,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_209A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_209A__static-FDG__...,True,RKF-APE-009__m00__T1-a__20140520__e010737.nii,True,APE-792,9,APE-792_9
4,RKF-APE-010,58,F,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_210A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_210A__static-FDG__...,True,RKF-APE-010__m00__T1-a__20140530__e010753.nii,True,APE-792,10,APE-792_10
5,RKF-APE-011,81,F,m00,lpt-n2a_sUR_i_n2a_rADNI_APE_792_211A__static-F...,n2a_sUR_i_n2a_rADNI_APE_792_211A__static-FDG__...,True,RKF-APE-011__m00__T1-a__20140527__e010746.nii,True,APE-792,11,APE-792_11
6,RKF-APE-013,80,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-FDG_0...,True,RKF-APE-013__m00__T1-a__20140630__e010796.nii,True,APE-792,13,APE-792_13
7,RKF-APE-016,77,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-FDG_0...,True,RKF-APE-016__m00__T1-a__20141121__n115548.nii,True,APE-792,16,APE-792_16
8,RKF-APE-018,73,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-FDG_0...,True,RKF-APE-018__m00__T1-a__20141125__n102556.nii,True,APE-792,18,APE-792_18
9,RKF-APE-020,71,F,m00,lpt-n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-F...,n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-FDG_0...,True,RKF-APE-020__m00__T1-a__20141126__n112426.nii,True,APE-792,20,APE-792_20


In [26]:
all_data_base = all_data[all_data['timepoint'].isin(['base'])]

In [27]:
randy_db_subset.columns[randy_db_subset.columns.isin(all_data_base.columns)]

Index(['Subject_Label', 'ID_ADM'], dtype='object')

In [28]:
all_imaging_plasma = pd.merge(
all_data_base, randy_db_subset, how = 'outer'
    
)

In [29]:
all_imaging_plasma['has_plasma'] = all_imaging_plasma[['Ab40',
 'Ab42',
 'GFAP',
 'NFL',
 'pTau181',
 'pTau217',
 'pTau231']].notna().any(axis = 1)

In [30]:
all_imaging_plasma['old_has_FDG'] = all_imaging_plasma[FDG_columns].notna().any(axis = 1)

In [31]:
all_imaging_plasma = all_imaging_plasma[['ID_ADM', 'timepoint', 'Subject_ID', 'Subject', 'AGE', 'Gender',
       'Visit (ADM)', 'FileName:', 'FDG_output', 'MRI ReName', 'FS_output', 'Subject_Num',
       'has_plasma', 'old_has_FDG']]

In [32]:
all_imaging_plasma

Unnamed: 0,ID_ADM,timepoint,Subject_ID,Subject,AGE,Gender,Visit (ADM),FileName:,FDG_output,MRI ReName,FS_output,Subject_Num,has_plasma,old_has_FDG
0,APE-792_1,base,APE-792-001,RKF-APE-001,76,M,m00,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,True,RKF-APE-001__m00__T1-a__20131125__e010542.nii,True,1,True,True
1,APE-792_10,base,APE-792-010,RKF-APE-010,58,F,m00,n2a_sUR_i_n2a_rADNI_APE_792_210A__static-FDG__...,True,RKF-APE-010__m00__T1-a__20140530__e010753.nii,True,10,True,True
2,APE-792_11,base,APE-792-011,RKF-APE-011,81,F,m00,n2a_sUR_i_n2a_rADNI_APE_792_211A__static-FDG__...,True,RKF-APE-011__m00__T1-a__20140527__e010746.nii,True,11,True,True
3,APE-792_13,base,APE-792-013,RKF-APE-013,80,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-FDG_0...,True,RKF-APE-013__m00__T1-a__20140630__e010796.nii,True,13,True,True
4,APE-792_16,base,APE-792-016,RKF-APE-016,77,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-FDG_0...,True,RKF-APE-016__m00__T1-a__20141121__n115548.nii,True,16,True,True
5,APE-792_18,base,APE-792-018,RKF-APE-018,73,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-FDG_0...,True,RKF-APE-018__m00__T1-a__20141125__n102556.nii,True,18,True,True
6,APE-792_20,base,APE-792-020,RKF-APE-020,71,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-FDG_0...,True,RKF-APE-020__m00__T1-a__20141126__n112426.nii,True,20,True,True
7,APE-792_22,base,,RKF-APE-022,UNK,O,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_222A__dyn-FDG_0...,True,RKF-APE-022__m00__T1-a__20150224__n123534.nii,True,22,True,False
8,APE-792_26,base,APE-792-026,RKF-APE-026,72 (DICOM says 74),F,m00,sUR_i_n2a_rADNI_mc_APE-792-226A__dyn-FDG_00001...,True,PENDING,False,26,True,True
9,APE-792_27,base,APE-792-027,RKF-APE-027,84,F,m00,sUR_i_n2a_rADNI_mc_APE-792-227A__dyn-FDG_00001...,True,RKF-APE-027__m00__T1-a__2015xxxx__saNiFTI.nii,True,27,True,True


In [33]:
all_imaging_plasma[['FDG_output', 'has_plasma','old_has_FDG'  ]].value_counts().sort_index()

FDG_output  has_plasma  old_has_FDG
False       False       False           1
True        False       False           7
                        True            3
            True        False           8
                        True           39
dtype: int64

In [34]:
all_imaging_plasma[[ 'has_plasma','old_has_FDG', 'FS_output'  ]].value_counts().sort_index()

has_plasma  old_has_FDG  FS_output
False       False        False         3
                         True          5
            True         True          3
True        False        False         2
                         True          6
            True         False        10
                         True         29
dtype: int64

In [35]:
all_imaging_plasma

Unnamed: 0,ID_ADM,timepoint,Subject_ID,Subject,AGE,Gender,Visit (ADM),FileName:,FDG_output,MRI ReName,FS_output,Subject_Num,has_plasma,old_has_FDG
0,APE-792_1,base,APE-792-001,RKF-APE-001,76,M,m00,n2a_sUR_i_n2a_rADNI_APE_792_201A__static-FDG__...,True,RKF-APE-001__m00__T1-a__20131125__e010542.nii,True,1,True,True
1,APE-792_10,base,APE-792-010,RKF-APE-010,58,F,m00,n2a_sUR_i_n2a_rADNI_APE_792_210A__static-FDG__...,True,RKF-APE-010__m00__T1-a__20140530__e010753.nii,True,10,True,True
2,APE-792_11,base,APE-792-011,RKF-APE-011,81,F,m00,n2a_sUR_i_n2a_rADNI_APE_792_211A__static-FDG__...,True,RKF-APE-011__m00__T1-a__20140527__e010746.nii,True,11,True,True
3,APE-792_13,base,APE-792-013,RKF-APE-013,80,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_213A__dyn-FDG_0...,True,RKF-APE-013__m00__T1-a__20140630__e010796.nii,True,13,True,True
4,APE-792_16,base,APE-792-016,RKF-APE-016,77,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_216A__dyn-FDG_0...,True,RKF-APE-016__m00__T1-a__20141121__n115548.nii,True,16,True,True
5,APE-792_18,base,APE-792-018,RKF-APE-018,73,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_218A__dyn-FDG_0...,True,RKF-APE-018__m00__T1-a__20141125__n102556.nii,True,18,True,True
6,APE-792_20,base,APE-792-020,RKF-APE-020,71,F,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_220A__dyn-FDG_0...,True,RKF-APE-020__m00__T1-a__20141126__n112426.nii,True,20,True,True
7,APE-792_22,base,,RKF-APE-022,UNK,O,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_222A__dyn-FDG_0...,True,RKF-APE-022__m00__T1-a__20150224__n123534.nii,True,22,True,False
8,APE-792_26,base,APE-792-026,RKF-APE-026,72 (DICOM says 74),F,m00,sUR_i_n2a_rADNI_mc_APE-792-226A__dyn-FDG_00001...,True,PENDING,False,26,True,True
9,APE-792_27,base,APE-792-027,RKF-APE-027,84,F,m00,sUR_i_n2a_rADNI_mc_APE-792-227A__dyn-FDG_00001...,True,RKF-APE-027__m00__T1-a__2015xxxx__saNiFTI.nii,True,27,True,True


# Main Comparisons

new FDG data that has plasma that we'll add to dataset:  
APE-792_22  
    - originally did not have demographics  
    - 67 yo F  
    - 1/3 backfills that required FDG sampling
    - listed in X:\CustomerStudies\Rockefeller\RIL_n3_FDG-GM-Sampling_072924.xlsx
APE-792_65  
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
APE-792_9  
    - non-completer  
    - will be added  
    - 1/3 backfills that required FDG sampling
    - listed in X:\CustomerStudies\Rockefeller\RIL_n3_FDG-GM-Sampling_072924.xlsx
RIL_17  
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
RIL_24  
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx


new FDG data that has plasma but that can't be added:  
APE-792_39  
    - originally did not have demographics  
    - was excluded by neuropsychological testing as not meeting criteria for AD  
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
APE-792_60  
    - never received a T1. Randy says: 'I have no record of an acquisition date. I did ask multiple times 2019/2020 for this.'  
    - originally did not have demographics  
    - 84yo F  
RIL_5  
    - 'completer'  
    - originally did not have demographics  
    - 64 yo F  
    - never received T1. Randy says 'I have no record of an acquisition date. I did ask multiple times 2019/2020 for this.'


Additional ppts Randy was looking at adding (no plasma):
  
APE_23  
    - originally did not have demographics  
    -  was excluded because he had a more Lewy Body Disease pattern. He is male and age 70yo
    - 1/3 of 'backfills' that completed without issue
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
APE_35  
    - originally did not have demographics  
    -  was excluded because Dawn said the FDG Pet was not characteristic of AD. Female and 75yo
    - 1/3 of 'backfills' that completed without issue
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
APE_71  
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx
RIL_13  
 - non-completer
 - Randy said  'appears to have an issue in the R. Inf. Frontal, but the FDG Co-registers just fine.  I’ll be curious how CI treats this'
 - 1/3 backfills that required FDG sampling
 - listed in X:\CustomerStudies\Rockefeller\RIL_n3_FDG-GM-Sampling_072924.xlsx
RIL_21  
    - originally did not have demographics  
    - was excluded as not a characteristic pattern of AD on FDG PET. Female and 72 yo.
    - 1/3 of 'backfills' that completed without issue
    - Listed in the X:\CustomerStudies\Rockefeller\RIL_n8_FDG-GM-Sampling_072224.xlsx


Other subjects Randy's mentioned:
 The two subjects that exceeded time limit did so again. (APE-046; RIL-009). Those two did complete the protocol, so some sort of time-extension to the hardcode might be needed.
APE-062 (also a “completer”) did finish CI this time (had had an issue with “rh.hippoSfLabels-T1.v10.FSvoxelSpace.mgz”, but that seemed resolved this time around).


In [36]:
# new FDG data that has plasma
print(all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==True) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==False), 'ID_ADM'].to_list())
print(all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==True) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==False), 'FileName:'].to_list())

all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==True) & (all_imaging_plasma['old_has_FDG']==False)]

['APE-792_22', 'APE-792_39', 'APE-792_60', 'APE-792_65', 'APE-792_9', 'RIL_17', 'RIL_24', 'RIL_5']
['n2a_sUR_i_n2a_rADNI_mc_APE_792_222A__dyn-FDG_00001__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_APE_792_239a_nifti_4frame_00001__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_AEP_792_260A_multiframe_00001__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_APE_792_265A_mutiframe_00001__nmsks.sn.img', 'n2a_sUR_i_n2a_rADNI_APE_792_209A__static-FDG__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_uo_RIL_S_2017A_multiframe_00001__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_o_RKF-RIL_S_2024A_multiframe_00001__nmsks.sn.img', 'i_n2a_sUR_rADNI_mc_uo_RIL_S_2005A_multiframe_00001__nmsks.sn.img']


Unnamed: 0,ID_ADM,timepoint,Subject_ID,Subject,AGE,Gender,Visit (ADM),FileName:,FDG_output,MRI ReName,FS_output,Subject_Num,has_plasma,old_has_FDG
7,APE-792_22,base,,RKF-APE-022,UNK,O,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_222A__dyn-FDG_0...,True,RKF-APE-022__m00__T1-a__20150224__n123534.nii,True,22,True,False
13,APE-792_39,base,,RKF-APE-039,UNK,O,m00,i_n2a_sUR_rADNI_mc_APE_792_239a_nifti_4frame_0...,True,RKF-APE-039__m00__T1-a__2015xxxx__saNiFTI.nii,True,39,True,False
21,APE-792_60,base,,RKF-APE-060,UNK,O,m00,i_n2a_sUR_rADNI_mc_AEP_792_260A_multiframe_000...,True,PENDING?,False,60,True,False
23,APE-792_65,base,,RKF-APE-065,81,M,m00,i_n2a_sUR_rADNI_mc_APE_792_265A_mutiframe_0000...,True,RKF-APE-065__m00__T1-a__20170517__e012169.nii,True,65,True,False
26,APE-792_9,base,,RKF-APE-009,75,M,m00,n2a_sUR_i_n2a_rADNI_APE_792_209A__static-FDG__...,True,RKF-APE-009__m00__T1-a__20140520__e010737.nii,True,9,True,False
33,RIL_17,base,,RKF-RIL-017,76,F,m00,i_n2a_sUR_rADNI_mc_uo_RIL_S_2017A_multiframe_0...,True,RKF-RIL-017__m00__T1-a__20190225__e013451.nii,True,17,True,False
39,RIL_24,base,,RKF-RIL-024,59,F,m00,i_n2a_sUR_rADNI_mc_o_RKF-RIL_S_2024A_multifram...,True,RKF-RIL-024__m00__T1-a__20190724__e013826.nii,True,24,True,False
46,RIL_5,base,,RKF-RIL-005,UNK,O,,i_n2a_sUR_rADNI_mc_uo_RIL_S_2005A_multiframe_0...,True,PENDING?,False,5,True,False


In [37]:
# old FDG data that doesn't have plasma

print(all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==False) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==True), 'ID_ADM'].to_list())
all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==False) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==True)]

['APE-792_58', 'RIL_20', 'RIL_22']


Unnamed: 0,ID_ADM,timepoint,Subject_ID,Subject,AGE,Gender,Visit (ADM),FileName:,FDG_output,MRI ReName,FS_output,Subject_Num,has_plasma,old_has_FDG
20,APE-792_58,base,APE-792-058,RKF-APE-058,78,M,m00,i_n2a_sUR_rADNI_mc_APE_792_258A_multiframe_000...,True,RKF-APE-058__m00__T1-a__20161011__e011842.nii,True,58,False,True
37,RIL_20,base,RIL-020,RKF-RIL-020,81,F,m00,i_n2a_sUR_rADNI_mc_o_RIL_S_2020A_multiframe_00...,True,RKF-RIL-020__m00__T1-a__20190410__e013552.nii,True,20,False,True
38,RIL_22,base,RIL-022,RKF-RIL-022,76,F,m00,i_n2a_sUR_rADNI_mc_o_RIL_S_2022A_multiframe_00...,True,RKF-RIL-022__m00__T1-a__20190528__e013678.nii,True,22,False,True


In [38]:
# new FDG data that doesn't have plasma
print(all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==False) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==False), 'ID_ADM'].to_list())
all_imaging_plasma.loc[(all_imaging_plasma['has_plasma']==False) & (all_imaging_plasma['FDG_output']==True) & (all_imaging_plasma['old_has_FDG']==False)]

['APE-792_21', 'APE-792_23', 'APE-792_35', 'APE-792_40', 'APE-792_71', 'RIL_13', 'RIL_21']


Unnamed: 0,ID_ADM,timepoint,Subject_ID,Subject,AGE,Gender,Visit (ADM),FileName:,FDG_output,MRI ReName,FS_output,Subject_Num,has_plasma,old_has_FDG
50,APE-792_21,,,RKF-APE-021,UNK,O,m00,i_n2a_sUR_rADNI_mc_APE_792_221a_nifti_4frame_0...,True,PENDING?,False,21,False,False
51,APE-792_23,,,RKF-APE-023,UNK,O,m00,n2a_sUR_i_n2a_rADNI_mc_APE_792_223A__dyn-FDG_0...,True,RKF-APE-023__m00__T1-a__20150310__n114941.nii,True,23,False,False
52,APE-792_35,,,RKF-APE-035,UNK,O,m00,i_n2a_sUR_rADNI_mc_APE_792_235a_nifti_4frame_0...,True,RKF-APE-035__m00__T1-a__2015xxxx__saNiFTI.nii,True,35,False,False
53,APE-792_40,,,RKF-APE-040,UNK,O,m00,i_n2a_sUR_rADNI_mc_APE_792_240a_multiframe_000...,True,PENDING?,False,40,False,False
55,APE-792_71,,,RKF-APE-071,69,M,m00,i_n2a_sUR_rADNI_mc_APE_792_171A_multiframe_000...,True,RKF-APE-071__m00__T1-a__20180720__e012994.nii,True,71,False,False
56,RIL_13,,,RKF-RIL-013,70,M,m00,i_n2a_sUR_rADNI_mc_uo_RIL_S_2013A_multiframe_0...,True,RKF-RIL-013__m00__T1-a__20190129__e013380.nii,True,13,False,False
57,RIL_21,,,RKF-RIL-021,UNK,O,m00,i_n2a_sUR_rADNI_mc_o_RKF-RIL_2021A_multiframe_...,True,RKF-RIL-021__m00__T1-a__20190515__e013645.nii,True,21,False,False
