In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path
import scipy.stats as stats
from scipy.stats import ttest_rel

Read in files and perform statistical tests of independence for Pearson scores at each walk length

In [5]:
# Read data
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path).glob('*batch??.csv')
for file in file_list:
    print(file)
    df = pd.read_csv(file)
    # ,ave to a copy of df, then perform Fisher's r to z transformation on Pearson scores
    df_z = df.copy()
    for i in range(1, 6):
        df_z[str(i)] = np.arctanh(df_z[str(i)])
    # print(df_z)
    print(file.stem)
    # compare difference in score between Recon methods for each walk length column
    for i in range(1, 6):
        print('Walk Length: '+str(i))
        # get unique values of Recon column
        recon_list = df['Recon'].unique()
        df_stats_all = pd.DataFrame()
        df_stats = pd.DataFrame()
        # loop through each unique value of Recon column
        for recon in recon_list:
            print(recon)
            print(df[df['Recon'] == str(recon)][str(i)])
            print('Statistics for '+str(recon))
            print('Mean')
            print(df[df['Recon'] == str(recon)][str(i)].mean())
            print('Stdev')
            print(df[df['Recon'] == str(recon)][str(i)].std())
            print('Median')
            print(df[df['Recon'] == str(recon)][str(i)].median())
            print('IQR')
            print(df[df['Recon'] == str(recon)][str(i)].quantile(
                q=0.75)-df[df['Recon'] == str(recon)][str(i)].quantile(q=0.25))
            # combine statistics from each Recon method into dataframe
            df_stats = pd.DataFrame({'Recon': recon_list, 'Mean': df[df['Recon'] == str(recon)][str(i)].mean(), 'Stdev': df[df['Recon'] == str(recon)][str(i)].std(
            ), 'Median': df[df['Recon'] == str(recon)][str(i)].median(), 'IQR': df[df['Recon'] == str(recon)][str(i)].quantile(q=0.75)-df[df['Recon'] == str(recon)][str(i)].quantile(q=0.25)})
            # combine df_stats from each Recon method into dataframe
            df_stats_all = pd.concat([df_stats, df_stats_all], axis=0)
        print(df_stats_all)
        # save dataframe to csv
        df_stats_all.to_csv(main_path+'/'+file.stem +
                            '_walk_length_'+str(i)+'_stats.csv', index=False)
        # calculate p-value for difference in score between Recon methods in recon_list, with comparisons between each pair of Recon methods, save all statistics and p-values to csv
        df_ttest_results = pd.DataFrame()
        df_ttest_rel_results = pd.DataFrame()
        df_z_ttest_results = pd.DataFrame()
        df_z_ttest_rel_results = pd.DataFrame()
        for recon in recon_list:
            for recon2 in recon_list:
                if recon != recon2:
                    print(recon)
                    print(recon2)
                    # calculate t-test on Pearson scores, get t-statistic and p-value
                    ttest_pearson_score = stats.ttest_ind(df[df['Recon'] == str(
                        recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)], alternative='less')
                    ttest_rel_pearson_score = ttest_rel(df[df['Recon'] == str(
                        recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)], alternative='less')
                    ttest_z_score = stats.ttest_ind(df_z[df_z['Recon'] == str(
                        recon)][str(i)], df_z[df_z['Recon'] == str(recon2)][str(i)], alternative='less')
                    ttest_rel_z_score = ttest_rel(df_z[df_z['Recon'] == str(
                        recon)][str(i)], df_z[df_z['Recon'] == str(recon2)][str(i)], alternative='less')
                    # combine t-test results into dataframe
                    df_ttest_results = pd.concat([df_ttest_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic Pearson Score t-test': ttest_pearson_score[0],
                                                 'p-value Pearson Score t-test':ttest_pearson_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_ttest_rel_results = pd.concat([df_ttest_rel_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic Pearson Score t-test': ttest_rel_pearson_score[0],
                                                    'p-value Pearson Score t-test':ttest_rel_pearson_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_z_ttest_rel_results = pd.concat([df_z_ttest_rel_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic z-score t-test': ttest_rel_z_score[0],
                                                    'p-value z-score t-test':ttest_rel_z_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
                    df_z_ttest_results = pd.concat([df_z_ttest_results, pd.DataFrame({'Recon 1': recon, 'Recon 2': recon2, 'T-statistic z-score t-test': ttest_z_score[0],
                                                    'p-value z-score t-test':ttest_z_score[1]}, index=[0])], axis=0)  # ,'T-statistic z-score':ttest_z_score[0],'p-value z-score':ttest_z_score[1]
        print('T-test ind results')
        print(df_ttest_results)
        print('T-test rel results')
        print(df_ttest_rel_results)
        print('T-test ind z-score results')
        print(df_z_ttest_results)
        print('T-test rel z-score results')
        print(df_z_ttest_rel_results)

        # save dataframe to csv
        df_ttest_results.to_csv(
            main_path+'/'+file.stem+'_walk_length_'+str(i)+'_ttest_results.csv', index=False)
        df_ttest_rel_results.to_csv(
            main_path+'/'+file.stem+'_walk_length_'+str(i)+'_ttest_rel_results.csv', index=False)
        df_z_ttest_results.to_csv(
            main_path+'/'+file.stem+'_walk_length_'+str(i)+'_z_score_ttest_results.csv', index=False)
        df_z_ttest_rel_results.to_csv(
            main_path+'/'+file.stem+'_walk_length_'+str(i)+'_z_score_ttest_rel_results.csv', index=False)
        # # calculate non-parametric statistics for difference in score between Recon methods in recon_list, with comparisons between each pair of Recon methods, save all statistics and p-values to csv
        # df_mannwhitneyu_results = pd.DataFrame()
        # for recon in recon_list:
        #     for recon2 in recon_list:
        #         if recon != recon2:
        #             # print(recon)
        #             # print(recon2)
        #             # print(stats.mannwhitneyu(df[df['Recon']==str(recon)][str(i)],df[df['Recon']==str(recon2)][str(i)]))
        #             # calculate Mann-Whitney U test on Pearson scores, get U-statistic and p-value
        #             mannwhitneyu = stats.mannwhitneyu(df[df['Recon'] == str(
        #                 recon)][str(i)], df[df['Recon'] == str(recon2)][str(i)])
        #             # # calculate Mann-Whitney U test on Fisher's r to z transformed Pearson scores, get U-statistic and p-value
        #             # mannwhitneyu_z = stats.mannwhitneyu(df_z[df_z['Recon']==str(recon)]['z'],df_z[df_z['Recon']==str(recon2)]['z'])
        #             # combine Mann-Whitney U test results into dataframe
        #             # , 'U-statistic z-score':mannwhitneyu_z[0],'p-value z-score':mannwhitneyu_z[1]
        #             df_mannwhitneyu_results = pd.concat([df_mannwhitneyu_results, pd.DataFrame(
        #                 {'Recon 1': recon, 'Recon 2': recon2, 'U-statistic Pearson Score Mann-Whitney': mannwhitneyu[0], 'p-value Pearson Score Mann-Whitney':mannwhitneyu[1]}, index=[0])], axis=0)
        # # save dataframe to csv
        # df_mannwhitneyu_results.to_csv(
        #     main_path+'/'+file.stem+'_walk_length_'+str(i)+'_mannwhitneyu_results.csv', index=False)



/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/mean_length_all_percent_batch26.csv
mean_length_all_percent_batch26
Walk Length: 1
MSMT CSD Mean Length
0     0.357959
1     0.198632
2     0.348130
3     0.252825
4     0.301341
5     0.477540
6     0.421624
7     0.434281
8     0.455945
9     0.220632
10    0.374796
11    0.352692
12    0.365303
13    0.366185
14    0.384996
15    0.273719
16    0.341852
17    0.342268
18    0.319967
19    0.460355
20    0.445645
Name: 1, dtype: float64
Statistics for MSMT CSD Mean Length
Mean
0.3569850782912871
Stdev
0.07769852387819227
Median
0.3579586163656797
IQR
0.1016568172778618
GQI Mean Length
21   NaN
22   NaN
23   NaN
24   NaN
25   NaN
26   NaN
27   NaN
28   NaN
29   NaN
30   NaN
31   NaN
32   NaN
33   NaN
34   NaN
35   NaN
36   NaN
37   NaN
38   NaN
39   NaN
40   NaN
41   NaN
Name: 1, dtype: float64
Statistics for GQI Mean Length
Mean
nan
Stdev
nan
Median
nan
IQR
nan
DTI Mean Length
42    0.494375
43    0.370792
44    0.459570
45

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0


GQI Mean Length
MSMT CSD Mean Length
GQI Mean Length
DTI Mean Length
DTI Mean Length
MSMT CSD Mean Length
DTI Mean Length
GQI Mean Length
T-test ind results
                Recon 1               Recon 2  \
0  MSMT CSD Mean Length       GQI Mean Length   
0  MSMT CSD Mean Length       DTI Mean Length   
0       GQI Mean Length  MSMT CSD Mean Length   
0       GQI Mean Length       DTI Mean Length   
0       DTI Mean Length  MSMT CSD Mean Length   
0       DTI Mean Length       GQI Mean Length   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                          0.900975                  8.135027e-01  
0                         -8.050634                  3.375130e-10  
0                         -0.900975                  1.864973e-01  
0                         -9.044716                  1.608593e-11  
0                          8.050634                  1.000000e+00  
0                          9.044716                  1.000000e+00  
T-test rel results
    

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0


DTI Streamline Count
MSMT CSD SIFT2 Streamline Count
DTI Streamline Count
GQI Streamline Count
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                          0.233439                      0.591695  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                         -0.233439             

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0


DTI Streamline Count
GQI Streamline Count
T-test ind results
                           Recon 1                          Recon 2  \
0  MSMT CSD SIFT2 Streamline Count             GQI Streamline Count   
0  MSMT CSD SIFT2 Streamline Count             DTI Streamline Count   
0             GQI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             GQI Streamline Count             DTI Streamline Count   
0             DTI Streamline Count  MSMT CSD SIFT2 Streamline Count   
0             DTI Streamline Count             GQI Streamline Count   

   T-statistic Pearson Score t-test  p-value Pearson Score t-test  
0                               NaN                           NaN  
0                         -0.330724                       0.37129  
0                               NaN                           NaN  
0                               NaN                           NaN  
0                          0.330724                       0.62871  
0                               N

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  cond2 = (x >= np.asarray(_b)) & cond0


Make Box Plots

In [6]:
# Read data
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path).glob('*batch??.csv')
for file in file_list:
    print(file)
    df = pd.read_csv(file)
    print(file.stem)
    # melt data for boxplot
    dd=pd.melt(df,id_vars=['Recon'],value_vars=['1','2','3','4','5'],var_name='Walk Length')
    # seaborn boxplot with hue based on recon method
    sns.boxplot(x='Walk Length',y='value',data=dd,hue='Recon')
    # plt.show()
    plt.ylabel('Pearson Score')
    # save figure
    plt.savefig(main_path+'/'+file.stem+'_box_plot.png')
    plt.close()

/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/mean_length_all_percent_batch26.csv
mean_length_all_percent_batch26
/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/count_all_percent_batch26.csv
count_all_percent_batch26


In [5]:
# read in z-score paired t-test results from all batches in main_path/stats with walk length 4
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path+'/stats').glob('volume_*_walk_length_4_z_score_ttest_rel_results.csv')
df = pd.DataFrame()
for file in file_list:
    print(file)
    df = pd.concat([df,pd.read_csv(file)],axis=0)
# get mean and standard deviation, max, min of t-statistic and p-value for the row where Recon 1 is DTI Node Volume Weighted Streamline Count and Recon 2 is MSMT CSD SIFT2 Node Volume Weighted Streamline Count
print('mean')
print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].mean())
print('std')
print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].std())
print('max')
print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].max())
print('min')
print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].min())

# save out mean, std, max, min to csv
df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].mean().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_mean.csv')
df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].std().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_std.csv')
df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].max().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_max.csv')
df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].min().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_min.csv')



/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch60_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch14_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch85_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch26_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch53_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch77_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/volume_weighted_all_percent_batch25_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v

  print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].mean())
  print(df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].std())
  df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].mean().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_mean.csv')
  df[(df['Recon 1']=='DTI Node Volume Weighted Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Node Volume Weighted Streamline Count')].std().to_csv(main_path+'/stats/walk_4_volume_weighted_paired_ttest_results_std.csv')


In [6]:
# read in z-score paired t-test results from all batches in main_path/stats with walk length 4
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path+'/stats').glob('count_*_walk_length_4_z_score_ttest_rel_results.csv')
df = pd.DataFrame()
for file in file_list:
    print(file)
    df = pd.concat([df,pd.read_csv(file)],axis=0)
# get mean and standard deviation, max, min of t-statistic and p-value for the row where Recon 1 is DTI Node Volume Weighted Streamline Count and Recon 2 is MSMT CSD SIFT2 Node Volume Weighted Streamline Count
print('mean')
print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].mean())
print('std')
print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].std())
print('max')
print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].max())
print('min')
print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].min())

# save out mean, std, max, min to csv
df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].mean().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_mean.csv')
df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].std().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_std.csv')
df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].max().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_max.csv')
df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].min().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_min.csv')

/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch41_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch91_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch11_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch65_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch29_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch90_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch55_walk_length_4_z_score_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/count_all_percent_batch63_walk_length_4_z_score_ttest_

  print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].mean())
  print(df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].std())
  df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].mean().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_mean.csv')
  df[(df['Recon 1']=='DTI Streamline Count') & (df['Recon 2']=='MSMT CSD SIFT2 Streamline Count')].std().to_csv(main_path+'/stats/walk_4_count_paired_ttest_results_std.csv')


In [7]:
# read in paired t-test results from all batches in main_path/stats with walk length 4
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/'
file_list = Path(main_path+'/stats').glob('mean_length_*_walk_length_4_ttest_rel_results.csv')
df = pd.DataFrame()
for file in file_list:
    print(file)
    df = pd.concat([df,pd.read_csv(file)],axis=0)
# get mean and standard deviation, max, min of t-statistic and p-value for the row where Recon 1 is DTI Mean Length and Recon 2 is MSMT CSD SIFT2 Mean Length
print('mean')
print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].mean())
print('std')
print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].std())
print('max')
print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].max())
print('min')
print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].min())

# save out mean, std, max, min to one combined csv
df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].mean().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_mean.csv')
df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].std().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_std.csv')
df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].max().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_max.csv')
df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].min().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_min.csv')

# read in mean, std, max, and min csvs and save out to one combined csv
main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/'
# main_path = '/home/paul/thesis/dev/SAY_sf_prediction_v3/scrambled_dataset/stats/'
file_list = Path(main_path).glob('walk_4_mean_length_paired_ttest_results_*.csv')
df = pd.DataFrame()
for file in file_list:
    print(file)
    if 'mean.csv' in str(file):
        # set a column "Statistic" to "mean"
        df = pd.concat([df,pd.read_csv(file).assign(Statistic='mean')],axis=0)
    elif 'std.csv' in str(file):
        # set a column "Statistic" to "std"
        df = pd.concat([df,pd.read_csv(file).assign(Statistic='std')],axis=0)
    elif 'max.csv' in str(file):
        # set a column "Statistic" to "max"
        df = pd.concat([df,pd.read_csv(file).assign(Statistic='max')],axis=0)
    elif 'min.csv' in str(file):
        # set a column "Statistic" to "min"
        df = pd.concat([df,pd.read_csv(file).assign(Statistic='min')],axis=0)

df.to_csv(main_path+'walk_4_mean_length_paired_ttest_results.csv')

/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch68_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch28_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch52_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch72_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch53_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch98_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch70_walk_length_4_ttest_rel_results.csv
/home/paul/thesis/dev/SAY_sf_prediction_v3/dataset/stats/mean_length_all_percent_batch81_walk_length_4_ttest_rel_results.csv


  print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].mean())
  print(df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].std())
  df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].mean().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_mean.csv')
  df[(df['Recon 2']=='DTI Mean Length') & (df['Recon 1']=='MSMT CSD Mean Length')].std().to_csv(main_path+'/stats/walk_4_mean_length_paired_ttest_results_std.csv')
