In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
import scipy.stats as stats
from scipy.stats import ttest_rel

Read in files and perform statistical tests of independence for Pearson scores at each walk length

In [8]:
# Read data
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v2/scrambled_dataset/'
file_list = Path(main_path).glob('*batch??.csv')
for file in file_list:
    print(file)
    df = pd.read_csv(file)
    # ,ave to a copy of df, then perform Fisher's r to z transformation on Pearson scores
    df_z = df.copy()
    for i in range(1, 6):
        df_z[str(i)] = np.arctanh(df_z[str(i)])
    # print(df_z)
    print(file.stem)
    # compare difference in score between Recon methods for each walk length column
    for i in range(1, 6):
        print('Walk Length: '+str(i))
        # get unique values of Recon column
        recon_list = df['Recon'].unique()
        df_stats_all = pd.DataFrame()
        df_stats = pd.DataFrame()
        # loop through each unique value of Recon column
        for recon in recon_list:
            print(recon)
            print(df[df['Recon'] == str(recon)][str(i)])
            print('Statistics for '+str(recon))
            print('Mean')
            print(df[df['Recon'] == str(recon)][str(i)].mean())
            print('Stdev')
            print(df[df['Recon'] == str(recon)][str(i)].std())
            print('Median')
            print(df[df['Recon'] == str(recon)][str(i)].median())
            print('IQR')
            print(df[df['Recon'] == str(recon)][str(i)].quantile(
                q=0.75)-df[df['Recon'] == str(recon)][str(i)].quantile(q=0.25))
            # combine statistics from each Recon method into dataframe
            df_stats = pd.DataFrame({'Recon': recon_list, 'Mean': df[df['Recon'] == str(recon)][str(i)].mean(), 'Stdev': df[df['Recon'] == str(recon)][str(i)].std(
            ), 'Median': df[df['Recon'] == str(recon)][str(i)].median(), 'IQR': df[df['Recon'] == str(recon)][str(i)].quantile(q=0.75)-df[df['Recon'] == str(recon)][str(i)].quantile(q=0.25)})
            # combine df_stats from each Recon method into dataframe
            df_stats_all = pd.concat([df_stats, df_stats_all], axis=0)
        print(df_stats_all)
        # save dataframe to csv
        df_stats_all.to_csv(main_path+'/stats/'+file.stem +
                            '_walk_length_'+str(i)+'_stats.csv', index=False)
        # calculate p-value for difference in score between Recon methods in recon_list, with comparisons between each pair of Recon methods, save all statistics and p-values to csv
        df_ttest_results = pd.DataFrame()
        df_ttest_rel_results = pd.DataFrame()
        df_z_ttest_results = pd.DataFrame()
        df_z_ttest_rel_results = pd.DataFrame()
        for recon in recon_list:
            for recon2 in recon_list:
                if recon != recon2:
                    print(recon)
                    print(recon2)
                    # print the length of df for each Recon method
                    print(len(df[df['Recon'] == str(recon)][str(i)]))
                    print(len(df[df['Recon'] == str(recon2)][str(i)]))
                    # calculate t-test on Pearson scores, get t-statistic and p-value
                    ttest_pearson_score = stats.ttest_ind(df[df['Recon'] == str(
                        recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)], alternative='less', nan_policy='propagate')
                    ttest_rel_pearson_score = ttest_rel(df[df['Recon'] == str(
                        recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)], alternative='less', nan_policy='propagate')
                    ttest_z_score = stats.ttest_ind(df_z[df_z['Recon'] == str(
                        recon)][str(i)], df_z[df_z['Recon'] == str(recon2)][str(i)], alternative='less', nan_policy='propagate')
                    ttest_rel_z_score = ttest_rel(df_z[df_z['Recon'] == str(
                        recon)][str(i)], df_z[df_z['Recon'] == str(recon2)][str(i)], alternative='less', nan_policy='propagate')
                    # combine t-test results into dataframe
                    df_ttest_results = pd.concat([df_ttest_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic Pearson Score t-test': ttest_pearson_score[0],
                                                 'p-value Pearson Score t-test':ttest_pearson_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_ttest_rel_results = pd.concat([df_ttest_rel_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic Pearson Score t-test': ttest_rel_pearson_score[0],
                                                                                          'p-value Pearson Score t-test':ttest_rel_pearson_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_z_ttest_rel_results = pd.concat([df_z_ttest_rel_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic z-score t-test': ttest_rel_z_score[0],
                                                                                              'p-value z-score t-test':ttest_rel_z_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_z_ttest_results = pd.concat([df_z_ttest_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic z-score t-test': ttest_z_score[0],
                                                    'p-value z-score t-test':ttest_z_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
        print('T-test ind results')
        print(df_ttest_results)
        print('T-test rel results')
        print(df_ttest_rel_results)
        print('T-test ind z-score results')
        print(df_z_ttest_results)
        print('T-test rel z-score results')
        print(df_z_ttest_rel_results)

        # save dataframe to csv
        df_ttest_results.to_csv(
            main_path+'/stats/'+file.stem+'_walk_length_'+str(i)+'_ttest_results.csv', index=False)
        df_ttest_rel_results.to_csv(
            main_path+'/stats/'+file.stem+'_walk_length_'+str(i)+'_ttest_rel_results.csv', index=False)
        df_z_ttest_results.to_csv(
            main_path+'/stats/'+file.stem+'_walk_length_'+str(i)+'_z_score_ttest_results.csv', index=False)
        df_z_ttest_rel_results.to_csv(
            main_path+'/stats/'+file.stem+'_walk_length_'+str(i)+'_z_score_ttest_rel_results.csv', index=False)
        # # calculate non-parametric statistics for difference in score between Recon methods in recon_list, with comparisons between each pair of Recon methods, save all statistics and p-values to csv
        # df_mannwhitneyu_results = pd.DataFrame()
        # for recon in recon_list:
        #     for recon2 in recon_list:
        #         if recon != recon2:
        #             # print(recon)
        #             # print(recon2)
        #             # print(stats.mannwhitneyu(df[df['Recon']==str(recon)][str(i)],df[df['Recon']==str(recon2)][str(i)]))
        #             # calculate Mann-Whitney U test on Pearson scores, get U-statistic and p-value
        #             mannwhitneyu = stats.mannwhitneyu(df[df['Recon'] == str(
        #                 recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)])
        #             # # calculate Mann-Whitney U test on Fisher's r to z transformed Pearson scores, get U-statistic and p-value
        #             # mannwhitneyu_z = stats.mannwhitneyu(df_z[df_z['Recon']==str(recon)]['z'],df_z[df_z['Recon']==str(recon2)]['z'])
        #             # combine Mann-Whitney U test results into dataframe
        #             # , 'U-statistic z-score':mannwhitneyu_z[0],'p-value z-score':mannwhitneyu_z[1]
        #             df_mannwhitneyu_results = pd.concat([df_mannwhitneyu_results, pd.DataFrame(
        #                 {'Recon 1': recon, 'Recon 2': recon2, 'U-statistic Pearson Score Mann-Whitney': mannwhitneyu[0], 'p-value Pearson Score Mann-Whitney':mannwhitneyu[1]}, index=[0])], axis=0)
        # # save dataframe to csv
        # df_mannwhitneyu_results.to_csv(
        #     main_path+'/'+file.stem+'_walk_length_'+str(i)+'_mannwhitneyu_results.csv', index=False)

/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/count_all_batch41.csv
count_all_batch41
Walk Length: 1
MSMT CSD SIFT2 Streamline Count
0     0.613709
1     0.615947
2     0.459425
3     0.537682
4     0.661649
5     0.422555
6     0.603934
7     0.622703
8     0.634884
9     0.628968
10    0.575197
11    0.610931
12    0.640958
13    0.563378
14    0.621856
15    0.572169
16    0.596888
17    0.591063
18    0.558532
19    0.494764
20    0.532592
Name: 1, dtype: float64
Statistics for MSMT CSD SIFT2 Streamline Count
Mean
0.579037373184194
Stdev
0.06130506207767524
Median
0.5968877830015069
IQR
0.06332347969518848
GQI Streamline Count
21    0.593448
22    0.592647
23    0.454313
24    0.546210
25    0.644447
26    0.430576
27    0.556105
28    0.615597
29    0.608029
30    0.585431
31    0.540963
32    0.594848
33    0.609132
34    0.565851
35    0.592926
36    0.558713
37    0.562922
38    0.570820
39    0.531897
40    0.478631
41    0.516424
Name: 1, dtype: float64
Statistics for GQ

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


GQI Streamline Count
DTI Streamline Count
21
21
DTI Streamline Count
MSMT CSD SIFT2 Streamline Count
21
21
DTI Streamline Count
GQI Streamline Count
21
21
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                     

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



MSMT CSD SIFT2 Streamline Count
21
21
GQI Streamline Count
DTI Streamline Count
21
21
DTI Streamline Count
MSMT CSD SIFT2 Streamline Count
21
21
DTI Streamline Count
GQI Streamline Count
21
21
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                

  cond2 = (x >= np.asarray(_b)) & cond0


MSMT CSD Mean Length
GQI Mean Length
21
21
MSMT CSD Mean Length
DTI Mean Length
21
21
GQI Mean Length
MSMT CSD Mean Length
21
21
GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                         -7.633080                  1.250658e-09  
0                         -6.839185                  1.571332e-08  
0                          7.633080                  1.000000e+00  
0                          1.555978                  9.362046e-01  
0                          6.839185              

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


DTI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
GQI Node Volume Weighted Streamline Count
DTI Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Vol

  cond2 = (x >= np.asarray(_b)) & cond0



MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                         -9.196832                  1.020005e-11  
0                         -7.883817                  5.684263e-10  
0                          9.196832                  1.000000e+00  
0                          1.808238                  9.609539e-01  
0                          7.883817                  1.000000e+00  
0                         -1.808238                  3.904609e-02  
T-test rel results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length    

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


0.06874142574550862
GQI Mean Length
21    0.480046
22    0.593578
23    0.542238
24    0.502666
25    0.460249
26    0.590776
27    0.477968
28    0.533064
29    0.576985
30    0.375998
31    0.509465
32    0.529967
33    0.497294
34         NaN
35    0.488638
36    0.458456
37    0.444174
38    0.391963
39    0.518320
40    0.556935
41    0.562950
Name: 2, dtype: float64
Statistics for GQI Mean Length
Mean
0.5045863991548815
Stdev
0.06000273927695526
Median
0.50606524141729
IQR
0.07237370903375262
DTI Mean Length
42    0.555553
43    0.531129
44    0.626483
45    0.555021
46    0.626662
47    0.599404
48    0.578306
49    0.406138
50    0.622971
51    0.553634
52    0.518978
53    0.581991
54    0.566279
55    0.616764
56    0.587683
57    0.565670
58    0.537095
59    0.563278
60    0.553336
61    0.529007
62    0.592748
Name: 2, dtype: float64
Statistics for DTI Mean Length
Mean
0.565148964042776
Stdev
0.04878096358840082
Median
0.5656698369948139
IQR
0.03941242537319667
           

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
T-test rel results
                Recon 1

  cond2 = (x >= np.asarray(_b)) & cond0


0.5537141851463867
IQR
0.03719286296607738
                                               Recon      Mean     Stdev  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...  0.557686  0.048517   
1          GQI Node Volume Weighted Streamline Count  0.557686  0.048517   
2          DTI Node Volume Weighted Streamline Count  0.557686  0.048517   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...  0.568469  0.049897   
1          GQI Node Volume Weighted Streamline Count  0.568469  0.049897   
2          DTI Node Volume Weighted Streamline Count  0.568469  0.049897   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...  0.582062  0.056607   
1          GQI Node Volume Weighted Streamline Count  0.582062  0.056607   
2          DTI Node Volume Weighted Streamline Count  0.582062  0.056607   

     Median       IQR  
0  0.553714  0.037193  
1  0.553714  0.037193  
2  0.553714  0.037193  
0  0.576154  0.055040  
1  0.576154  0.055040  
2  0.576154  0.055040  
0  0.592226  0.075246  
1  0

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



T-test rel z-score results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T-statistic z-score t-test  p-value z-score t-test  
0                         NaN                     NaN  
0                         NaN                     NaN  
0                 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



T-test ind z-score results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T-statistic z-score t-test  p-value z-score t-test  
0                         NaN                     NaN  
0                         NaN                     NaN  
0                 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


0.06819528260952834
GQI Mean Length
21    0.493144
22    0.439891
23    0.511492
24    0.607055
25    0.569730
26    0.528889
27    0.553271
28    0.564763
29    0.516577
30    0.411584
31    0.602176
32    0.525191
33    0.566608
34    0.527448
35    0.602215
36    0.442172
37         NaN
38    0.541362
39    0.571366
40    0.550139
41    0.522651
Name: 2, dtype: float64
Statistics for GQI Mean Length
Mean
0.532386204139852
Stdev
0.05379504417442483
Median
0.5351255161158748
IQR
0.052082412152006596
DTI Mean Length
42    0.624282
43    0.650951
44    0.568856
45    0.622757
46    0.593426
47    0.584873
48    0.521836
49    0.510290
50    0.604763
51    0.652527
52    0.616175
53    0.489204
54    0.600477
55    0.560112
56    0.662397
57    0.536778
58    0.518934
59    0.601479
60    0.628891
61    0.539951
62    0.573909
Name: 2, dtype: float64
Statistics for DTI Mean Length
Mean
0.5839461179277257
Stdev
0.05009331070342644
Median
0.5934257286186191
IQR
0.08280656872502301
        

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



T-test rel results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
T-test ind z-score results
                Recon 1               Recon 2  T-statistic z-score t-test  \
0  MSMT CSD Mean Length       GQI Mean Length           

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


DTI Streamline Count
GQI Streamline Count
21
21
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                          

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


GQI Streamline Count
MSMT CSD SIFT2 Streamline Count
21
21
GQI Streamline Count
DTI Streamline Count
21
21
DTI Streamline Count
MSMT CSD SIFT2 Streamline Count
21
21
DTI Streamline Count
GQI Streamline Count
21
21
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
T-test 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
T-test 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


DTI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



DTI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   

  cond2 = (x >= np.asarray(_b)) & cond0


GQI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
GQI Node Volume Weighted Streamline Count
DTI Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


21
21
MSMT CSD Mean Length
DTI Mean Length
21
21
GQI Mean Length
MSMT CSD Mean Length
21
21
GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



IQR
0.08542094187712734
                  Recon      Mean     Stdev    Median       IQR
0  MSMT CSD Mean Length  0.577436  0.054248  0.570321  0.085421
1       GQI Mean Length  0.577436  0.054248  0.570321  0.085421
2       DTI Mean Length  0.577436  0.054248  0.570321  0.085421
0  MSMT CSD Mean Length  0.511877  0.068965  0.536415  0.103516
1       GQI Mean Length  0.511877  0.068965  0.536415  0.103516
2       DTI Mean Length  0.511877  0.068965  0.536415  0.103516
0  MSMT CSD Mean Length  0.496316  0.076943  0.526432  0.112630
1       GQI Mean Length  0.496316  0.076943  0.526432  0.112630
2       DTI Mean Length  0.496316  0.076943  0.526432  0.112630
MSMT CSD Mean Length
GQI Mean Length
21
21
MSMT CSD Mean Length
DTI Mean Length
21
21
GQI Mean Length
MSMT CSD Mean Length
21
21
GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Le

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0        

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



MSMT CSD SIFT2 Node Volume Weighted Streamline Count
21
21
DTI Node Volume Weighted Streamline Count
GQI Node Volume Weighted Streamline Count
21
21
T-test ind results
                                             Recon 1  \
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   

                                             Recon 2  \
0          GQI Node Volume Weighted Streamline Count   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          DTI Node Volume Weighted Streamline Count   
0  MSMT CSD SIFT2 Node Volume Weighted Streamline...   
0          GQI Node Volume Weighted Streamline Count   

   T-statistic Pearson Score t-test  p-value 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0



GQI Streamline Count
21    0.507628
22    0.607147
23    0.572364
24    0.512039
25    0.479021
26    0.599358
27    0.521401
28    0.584273
29    0.621728
30    0.425719
31    0.532138
32    0.602109
33    0.541469
34         NaN
35    0.586183
36    0.496229
37    0.571665
38    0.438039
39    0.574754
40    0.564950
41    0.594897
Name: 3, dtype: float64
Statistics for GQI Streamline Count
Mean
0.5466555495919072
Stdev
0.056195449401343786
Median
0.5683073244454511
IQR
0.07742564350185621
DTI Streamline Count
42    0.537562
43    0.519601
44    0.359469
45    0.638324
46    0.639715
47    0.371027
48    0.298451
49    0.445419
50    0.656281
51    0.593069
52    0.233297
53    0.615743
54    0.600920
55    0.371972
56    0.593129
57    0.579526
58    0.509779
59    0.592708
60    0.284592
61    0.543838
62    0.609098
Name: 3, dtype: float64
Statistics for DTI Streamline Count
Mean
0.5044532696885619
Stdev
0.1319312933162957
Median
0.5438378274056029
IQR
0.22894735590007576
       

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                               NaN                           NaN  
T-test 

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


                  Recon      Mean     Stdev    Median       IQR
0  MSMT CSD Mean Length  0.542501  0.068310  0.535020  0.110485
1       GQI Mean Length  0.542501  0.068310  0.535020  0.110485
2       DTI Mean Length  0.542501  0.068310  0.535020  0.110485
0  MSMT CSD Mean Length  0.517675  0.063036  0.522445  0.076611
1       GQI Mean Length  0.517675  0.063036  0.522445  0.076611
2       DTI Mean Length  0.517675  0.063036  0.522445  0.076611
0  MSMT CSD Mean Length  0.456080  0.074864  0.467311  0.096309
1       GQI Mean Length  0.456080  0.074864  0.467311  0.096309
2       DTI Mean Length  0.456080  0.074864  0.467311  0.096309
MSMT CSD Mean Length
GQI Mean Length
21
21
MSMT CSD Mean Length
DTI Mean Length
21
21
GQI Mean Length
MSMT CSD Mean Length
21
21
GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Lengt

  cond2 = (x >= np.asarray(_b)) & cond0
  cond2 = (x >= np.asarray(_b)) & cond0


                  Recon      Mean     Stdev    Median       IQR
0  MSMT CSD Mean Length  0.551084  0.070191  0.535277  0.118328
1       GQI Mean Length  0.551084  0.070191  0.535277  0.118328
2       DTI Mean Length  0.551084  0.070191  0.535277  0.118328
0  MSMT CSD Mean Length  0.515405  0.059612  0.515827  0.064427
1       GQI Mean Length  0.515405  0.059612  0.515827  0.064427
2       DTI Mean Length  0.515405  0.059612  0.515827  0.064427
0  MSMT CSD Mean Length  0.500395  0.067947  0.511294  0.081773
1       GQI Mean Length  0.500395  0.067947  0.511294  0.081773
2       DTI Mean Length  0.500395  0.067947  0.511294  0.081773
MSMT CSD Mean Length
GQI Mean Length
21
21
MSMT CSD Mean Length
DTI Mean Length
21
21
GQI Mean Length
MSMT CSD Mean Length
21
21
GQI Mean Length
DTI Mean Length
21
21
DTI Mean Length
MSMT CSD Mean Length
21
21
DTI Mean Length
GQI Mean Length
21
21
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Lengt

Make Box Plots

In [9]:
# Read data
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path).glob('*batch??.csv')
for file in file_list:
    print(file)
    df = pd.read_csv(file)
    print(file.stem)
    # melt data for boxplot
    dd=pd.melt(df,id_vars=['Recon'],value_vars=['1','2','3','4','5'],var_name='Walk Length')
    # seaborn boxplot with hue based on recon method
    sns.boxplot(x='Walk Length',y='value',data=dd,hue='Recon')
    # plt.show()
    plt.ylabel('Pearson Score')
    # save figure
    plt.savefig(main_path+'/figures/'+file.stem+'_box_plot.png')
    plt.close()

/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/count_all_batch41.csv
count_all_batch41
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/volume_weighted_all_batch42.csv
volume_weighted_all_batch42
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/count_all_batch46.csv
count_all_batch46
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/volume_weighted_all_batch46.csv
volume_weighted_all_batch46
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/count_all_batch32.csv
count_all_batch32
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/mean_length_all_batch42.csv
mean_length_all_batch42
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/volume_weighted_all_batch31.csv
volume_weighted_all_batch31
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/mean_length_all_batch46.csv
mean_length_all_batch46
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/count_all_batch33.csv
count_all_batch33
/home/paul/thesis/dev/SAY_sf_prediction_v2/dataset/mean_length_all_batch47.csv
mean_length_all_ba