In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from functools import reduce

In [2]:
# Load in data
bb_data = pd.read_csv('bbdata_area.csv')
bb_data.set_index('pat_id',inplace=True)

# Calculate effective diameter from area
bb_data['effective_diameter'] = bb_data['area'].apply(lambda x: np.sqrt((4 * x)/np.pi))

# Calculate percent oversizing
bb_data['percent_oversizing'] = 100*((bb_data['graft'] / bb_data['effective_diameter'])-1)

# Curvature * diameter metric
bb_data['CD'] = bb_data['effective_diameter'] * bb_data['curve']

# Group by BBH
bb_data['group'] = bb_data['bbh'].apply(lambda x: 'BB' if x >= 5 else 'NBB')

results_data_all = bb_data.copy()
results_data_all['group'] = 'All'
results_data = pd.concat([bb_data, results_data_all])


results_data.rename(columns={
                                    'bbh': 'BBH (mm)', 
                                    'curve':'Curvature (mm-1)', 
                                    'effective_diameter':'Diameter (mm)', 
                                    'percent_oversizing':'Graft Oversizing (%)',
                                    'bba': 'BBA (deg)',
                                    'bbl': 'BBL (mm)',
                                    'graft': 'Proximal Graft Diameter (mm)',
                                    'area': 'Aortic Area (mm2)'},inplace=True)
results_data_output = results_data.groupby('group').agg([np.mean, np.std]).reset_index()
results_data_output



Unnamed: 0_level_0,group,BBH (mm),BBH (mm),BBL (mm),BBL (mm),BBA (deg),BBA (deg),Proximal Graft Diameter (mm),Proximal Graft Diameter (mm),Aortic Area (mm2),Aortic Area (mm2),Curvature (mm-1),Curvature (mm-1),Diameter (mm),Diameter (mm),Graft Oversizing (%),Graft Oversizing (%),CD,CD
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
0,All,4.746377,2.687358,4.981404,4.416005,24.297597,9.946546,37.238095,5.166283,908.146843,207.53541,0.036532,0.014275,33.769589,4.08638,10.650743,11.930359,1.223223,0.487977
1,BB,6.830481,1.642831,8.352839,3.281711,28.490593,8.20095,38.727273,3.977208,975.716606,142.722109,0.041799,0.01488,35.157305,2.629332,10.856152,15.802188,1.458089,0.502663
2,NBB,2.453864,1.378984,1.272825,1.481658,19.6853,9.999964,35.6,6.003703,833.820103,247.713069,0.030738,0.011644,32.243102,4.948077,10.424794,6.223641,0.96487,0.326849


In [3]:
results_data_output
one_sigfig = ['BBL (mm)', 'BBH (mm)','Diameter (mm)','CD']
three_sigfig = ['Curvature (mm-1)']
results_temp = []
cols = ['Proximal Graft Diameter (mm)','Aortic Area (mm2)','Curvature (mm-1)', 'Diameter (mm)', 'CD', 'Graft Oversizing (%)','BBL (mm)', 'BBH (mm)', 'BBA (deg)']
for col in cols:
    fstring = "{0:.3f} ± {1:.3f}" if col in three_sigfig else ("{0:.1f} ± {1:.1f}" if col in one_sigfig else "{0:.0f} ± {1:.0f}")
    mean_std = results_data_output[col]
    mean_std['mean ± std'] = mean_std.apply(lambda x: fstring.format(x['mean'], x['std']),axis=1)
    mean_std.drop(columns=['mean', 'std'],inplace=True)
    mean_std.index = ['All', 'BB', 'NBB']
    mean_std.columns = pd.MultiIndex.from_product([[col], mean_std.columns])
    results_temp.append(mean_std)
results_data_output = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True),results_temp)
display(results_data_output)    
for feat in cols:
    bbgroup = results_data[results_data['group'] == 'BB']
    nobbgroup = results_data[results_data['group'] == 'NBB']
    s, p = stats.ttest_ind(bbgroup[feat], nobbgroup[feat])
    results_data_output.loc['p-value', feat] = ['{:.3f}'.format(p)]
results_data_output.fillna('',inplace=True)
    
results_data_output = results_data_output.transpose()
results_data_output = results_data_output.reindex(sorted(results_data_output.index.values))
display(results_data_output)
#writer = pd.ExcelWriter("/Users/maxfrohlich/Dropbox/Stanford-SJSU-Manuscript/Manuscript/Figures/bb_groups_demo.xlsx")
#results_data_output.to_excel(writer, 'sheet1')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


Unnamed: 0_level_0,Proximal Graft Diameter (mm),Aortic Area (mm2),Curvature (mm-1),Diameter (mm),CD,Graft Oversizing (%),BBL (mm),BBH (mm),BBA (deg)
Unnamed: 0_level_1,mean ± std,mean ± std,mean ± std,mean ± std,mean ± std,mean ± std,mean ± std,mean ± std,mean ± std
All,37 ± 5,908 ± 208,0.037 ± 0.014,33.8 ± 4.1,1.2 ± 0.5,11 ± 12,5.0 ± 4.4,4.7 ± 2.7,24 ± 10
BB,39 ± 4,976 ± 143,0.042 ± 0.015,35.2 ± 2.6,1.5 ± 0.5,11 ± 16,8.4 ± 3.3,6.8 ± 1.6,28 ± 8
NBB,36 ± 6,834 ± 248,0.031 ± 0.012,32.2 ± 4.9,1.0 ± 0.3,10 ± 6,1.3 ± 1.5,2.5 ± 1.4,20 ± 10


Unnamed: 0,Unnamed: 1,All,BB,NBB,p-value
Aortic Area (mm2),mean ± std,908 ± 208,976 ± 143,834 ± 248,0.12
BBA (deg),mean ± std,24 ± 10,28 ± 8,20 ± 10,0.039
BBH (mm),mean ± std,4.7 ± 2.7,6.8 ± 1.6,2.5 ± 1.4,0.0
BBL (mm),mean ± std,5.0 ± 4.4,8.4 ± 3.3,1.3 ± 1.5,0.0
CD,mean ± std,1.2 ± 0.5,1.5 ± 0.5,1.0 ± 0.3,0.016
Curvature (mm-1),mean ± std,0.037 ± 0.014,0.042 ± 0.015,0.031 ± 0.012,0.075
Diameter (mm),mean ± std,33.8 ± 4.1,35.2 ± 2.6,32.2 ± 4.9,0.104
Graft Oversizing (%),mean ± std,11 ± 12,11 ± 16,10 ± 6,0.937
Proximal Graft Diameter (mm),mean ± std,37 ± 5,39 ± 4,36 ± 6,0.172


In [4]:
bb_data
stats.pearsonr(bb_data['bbh'], bb_data['bbl'])

(0.9192792446264377, 3.943269304222449e-09)

In [11]:
results_data_corr = results_data[results_data.group == 'All']
bba_out = []
bbl_out = []
bbh_out = []
for feat in ['Proximal Graft Diameter (mm)', 'Aortic Area (mm2)', 'Curvature (mm-1)', 'Diameter (mm)', 'Graft Oversizing (%)', 'CD']:
    current_feat = results_data_corr[feat]
    bba = results_data_corr['BBA (deg)']
    bbl = results_data_corr['BBL (mm)']
    bbh = results_data_corr['BBH (mm)']
    bba_r, bba_p = stats.pearsonr(current_feat, bba)
    bbl_r, bbl_p = stats.pearsonr(current_feat, bbl)
    bbh_r, bbh_p = stats.pearsonr(current_feat, bbh)
    bba_out.append(pd.DataFrame({'r-value': [bba_r], 'p-value':[bba_p]},index=[feat]))
    bbl_out.append(pd.DataFrame({'r-value': [bbl_r], 'p-value':[bbl_p]},index=[feat]))
    bbh_out.append(pd.DataFrame({'r-value': [bbh_r], 'p-value':[bbh_p]},index=[feat]))
bbl_corr = pd.concat(bbl_out)
bba_corr = pd.concat(bba_out)
bbh_corr = pd.concat(bbh_out)
bbal_corr = pd.concat({'BBL Correlation':bbl_corr.sort_index(), 
                       'BBA Correlation': bba_corr.sort_index(),
                       'BBH Correlation': bbh_corr.sort_index()})

#bbal_corr.to_csv('/Users/maxfrohlich/Dropbox/figure_1_raw/bba_bbl.csv')
def bold_significance(val):
    fontweight = 'bold' if val<0.05 else 'normal'
    return 'font-weight: {}'.format(fontweight)

bbal_corr.style.applymap(bold_significance, subset='p-value')

Unnamed: 0,Unnamed: 1,r-value,p-value
BBA Correlation,Aortic Area (mm2),0.105167,0.650054
BBA Correlation,CD,0.735811,0.000143682
BBA Correlation,Curvature (mm-1),0.709587,0.000315058
BBA Correlation,Diameter (mm),0.115314,0.618664
BBA Correlation,Graft Oversizing (%),0.190032,0.409332
BBA Correlation,Proximal Graft Diameter (mm),0.231689,0.312229
BBH Correlation,Aortic Area (mm2),0.33324,0.1399
BBH Correlation,CD,0.581368,0.0057069
BBH Correlation,Curvature (mm-1),0.443567,0.0439975
BBH Correlation,Diameter (mm),0.354616,0.114725
