A remake of the bar-and-whisker plot of statistical significance for the RDA paper based on RDA_bars_with_whiskers, but rewritten in much better style and to take advantage of more efficient loading of data.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
data_file = 'Data/decadal_stats.hdf'
results_df = pd.read_hdf(data_file)
results_df = results_df.sort_index().sort_index(axis=1)

In [3]:
results_df

Unnamed: 0_level_0,Period,all,all,fall,fall,meiyu,meiyu,post-meiyu,post-meiyu,pre-meiyu,pre-meiyu,spring,spring
Unnamed: 0_level_1,Years,8007_5179,9407_8093,8007_5179,9407_8093,8007_5179,9407_8093,8007_5179,9407_8093,8007_5179,9407_8093,8007_5179,9407_8093
Metric,Stat,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
frequency,diff_p2p1,-0.00489237,0.000782779,0.0138612,-0.00759878,-0.0243227,-0.00357143,-0.000607328,0.0136986,-0.0601293,0.00178571,0.0119317,-0.0433255
frequency,mean_p1,0.386301,0.381018,0.260455,0.278116,0.668966,0.646429,0.421351,0.413894,0.591379,0.530357,0.465235,0.498829
frequency,mean_p2,0.381409,0.3818,0.274316,0.270517,0.644643,0.642857,0.420744,0.427593,0.53125,0.532143,0.477166,0.455504
frequency,pval,0.234094,0.532461,0.791132,0.378688,0.110641,0.450316,0.484179,0.734773,0.00188095,0.523874,0.759492,0.0363975
frequency,std_p1,0.00473255,0.00679362,0.0118878,0.0174676,0.0138169,0.0202025,0.0107317,0.0154066,0.0144333,0.0210899,0.0118592,0.0171096
frequency,std_p2,0.00480476,0.00679629,0.012299,0.0173178,0.0143015,0.0202481,0.0109195,0.0154754,0.0149111,0.0210852,0.0120857,0.0170418
intensity,diff_p2p1,0.36322,0.674041,0.753374,1.39691,0.315293,2.66856,0.184915,-0.410096,0.362703,1.32048,0.828424,0.0227796
intensity,mean_p1,23.2812,23.307,20.2594,20.3311,28.1122,27.0932,25.459,25.8524,25.392,25.0903,19.7509,20.5685
intensity,mean_p2,23.6445,23.981,21.0128,21.728,28.4275,29.7618,25.6439,25.4423,25.7547,26.4108,20.5794,20.5913
intensity,pval,0.945905,0.978802,0.856614,0.908909,0.725827,0.9998,0.639436,0.276672,0.755924,0.951505,0.983802,0.519848


In [4]:
#customized script that adds overbars to indicate time periods, and then on top of that a p-value for the significance of the change.
def addpvals(fig,ax,rects,errs,pvals):
    for rects,errs,pvals in zip(rects,errs,pvals):
        
        heights = [ rect.get_height() + err for rect,err in zip(rects,errs) ]            
        height = max(heights[0],heights[1],heights[2])
        yax = ax.get_ylim()
        yhgt = yax[1]-yax[0]
        
        x1 = rect1.get_x() + .175
        x2 = rect1.get_x() + .525
        y = height + .01*yhgt
        
        xx1 = rect1.get_x() + .4375
        xx2 = rect1.get_x() + .6125
        yy = y + .13*yhgt

        #'51-'79 v '80-'07
        ax.annotate("a", color='White', xy=(x1, y), xytext=(x1, y + .02*yhgt), 
            fontsize=15, ha='center', va='bottom',
            arrowprops=dict(arrowstyle='-[, widthB=1.25, lengthB=.5', lw=1.0))

        ax.annotate("b", color='White', xy=(x2, y), xytext=(x2, y + .02*yhgt), 
            fontsize=15, ha='center', va='bottom',
            arrowprops=dict(arrowstyle='-[, widthB=1.25, lengthB=.5', lw=1.0))

        #add lines
        plt.plot([x1, (x1+x2)/2], [y + .008*yhgt, y + .04*yhgt], 'k-', lw=1)
        plt.plot([x2, (x1+x2)/2], [y + .008*yhgt, y + .04*yhgt], 'k-', lw=1)

        #add p-value
        if pval1 < .025:
            if pval1 < .005:
                plt.text((x1+x2)/2, y + .06*yhgt,'p<.001', horizontalalignment = 'center',fontsize = 14, \
                    weight='bold', style='italic')

            else:
                plt.text((x1+x2)/2, y + .06*yhgt,'p=' + str(round(pval1,3)), horizontalalignment = 'center', fontsize = 14,\
                    weight='bold')
        elif pval1 > .975:            
            if pval1 > .995:
                plt.text((x1+x2)/2, y + .06*yhgt,'p>.999', horizontalalignment = 'center',fontsize = 14, \
                    weight='bold', style='italic')
            else:
                plt.text((x1+x2)/2, y + .06*yhgt,'p=' + str(round(pval1,3)), horizontalalignment = 'center', fontsize = 14,\
                    weight='bold')
        else:
            plt.text((x1+x2)/2, y + .06*yhgt,'p=' + str(round(pval1,3)), horizontalalignment = 'center', fontsize = 14)
            
        print(pval1)
        print(pval2)


        #'80-'93 v '94-'07
        ax.annotate("a", color='White', xy=(xx1, yy), xytext=(xx1, yy + .02*yhgt),\
            fontsize=15, ha='center', va='bottom',
            arrowprops=dict(arrowstyle='-[, widthB=.6, lengthB=.5', lw=1.0))

        ax.annotate("b", color='White', xy=(xx2, yy), xytext=(xx2, yy + .02*yhgt), 
            fontsize=15, ha='center', va='bottom',
            arrowprops=dict(arrowstyle='-[, widthB=.6, lengthB=.5', lw=1.0))

        #add lines
        plt.plot([xx1, (xx1+xx2)/2], [yy + .008*yhgt, yy + .03*yhgt], 'k-', lw=1)
        plt.plot([xx2, (xx1+xx2)/2], [yy + .008*yhgt, yy + .03*yhgt], 'k-', lw=1)

        #add p-value
        if pval2 < .025:
            if pval2 < .001:
                plt.text((xx1+xx2)/2, yy + .06*yhgt,'p<.001', horizontalalignment = 'center',fontsize = 14,\
                         weight='bold', style='italic')
            else:
                plt.text((xx1+xx2)/2, yy + .06*yhgt,'p=' + str(round(pval2,3)), horizontalalignment = 'center', \
                         fontsize = 14, weight='bold')
        elif pval2 > .975:            
            if pval2 > .999:
                plt.text((xx1+xx2)/2, yy + .06*yhgt,'p>.999', horizontalalignment = 'center',fontsize = 14, \
                         weight='bold', style='italic')
            else:
                plt.text((xx1+xx2)/2, yy + .06*yhgt,'p=' + str(round(pval2,3)), horizontalalignment = 'center', \
                         fontsize = 14, weight='bold')
        else:
            plt.text((xx1+xx2)/2, yy + .06*yhgt,'p=' + str(round(pval2,3)), horizontalalignment = 'center', fontsize = 14)

In [5]:
N = 6               # number of data entries
ind = np.arange(N)              # the x locations for the groups
width1 = 0.35                   # bar width
width2 = 0.175

In [12]:
fig = plt.figure(figsize = (12,18))
ax = 

rects = []
colors = ['MediumSlateBlue','Tomato','Gold']
metrics = ['frequency','latitude','intensity']
my_error_kw={'ecolor':'Black','linewidth':2}
periods = ['p1','p1','p2']
widths = [width1, width2, width2]
xpositions = (ind,ind+width1,ind+width1+width2)
year_tags = ['8007_5179','9407_8093','9407_8093']

for met in metrics:
    sub_rects = []
    
    for (color, p, width, xpos, yr_tag) in zip(colors, periods, widths, xpositions,year_tags):
        vals = results_df.loc[(met,'mean_'+p),(slice(None),yr_tag)].values
        errs = results_df.loc[(met,'std_'+p),(slice(None),yr_tag)].values
        print(vals)
        print(errs)
        sub_rects.append(ax1.bar(ind, vals,width,color='MediumSlateBlue',yerr=errs,error_kw=my_error_kw))
                         
    rects.append(sub_rects)

OPTIONAL: Change axes limits if necessary
ax1.set_ylim([0, .94])
ax1.set_xlim([-.175,5.875])     

#addpvals(fig,ax1,rects,)
#         f1_5179_std,f1_8093_std,f1_9407_std,f1_5179_8007_diff,f1_8093_9407_diff)

#ax1.set_ylabel('Frequency', fontsize=14)
#ax1.set_title('a) Primary Rainband Frequency', fontsize=16)
#ax1.set_xticks(ind + width1)
#ax1.set_xticklabels(('Spring', 'Pre-Meiyu', 'Meiyu', 'Post-Meiyu', 'Fall','Full Year'), fontsize=14)

#ax1.legend((rects[0][0], rects[1][0], rects[2][0]), ("'51-'79", "'80-'93", "'94-'07"))

#fig.show()

[0.3863013698630137 0.260454878943507 0.6689655172413793 0.4213509683514407
 0.5913793103448276 0.4652345958168457]
[0.004732550641004436 0.011887777250868767 0.013816864195342799
 0.010731716330114853 0.014433253678897206 0.011859154756504033]
[0.3810176125244618 0.27811550151975684 0.6464285714285715
 0.41389432485322897 0.5303571428571429 0.49882903981264637]
[0.006793619538405638 0.017467610901295116 0.020202487181983125
 0.015406615751405839 0.021089877559996095 0.017109600850954094]
[0.3818003913894325 0.270516717325228 0.6428571428571429
 0.42759295499021527 0.5321428571428571 0.45550351288056207]
[0.006796293057235361 0.017317762503711923 0.02024809676835146
 0.015475415129092993 0.02108515205129522 0.017041761030169463]
[28.734931168945391 29.098469183141315 29.603522256948533
 30.192827126965451 27.498463999419254 27.609162664139919]
[0.064696076602639818 0.20736285780621891 0.15470992701327804
 0.16756188552671861 0.13047697504640665 0.099626976771838724]
[28.567487179460795