# Statistical Analysis

### Program to conduct 2-sided T tests or KS tests on the data of all intensities within the ___ km buffer from the track files 

In [55]:
import xarray as xr
import numpy as np
from scipy import stats
from tc_functions.calculations import get_all_intdist, get_total_storms

#initialize lists of distances, hypothesis and models
#also find total number of models and set output file name

alt_hyp = ['less', 'greater']
distance = ['100km', '200km', '300km', 'NA.storms']
model_p = ['REF.COMB', 'REF.COMB', 'HadGEM3-GC31-HH(19852014)', 'CNRM-CM6-1-HR(19852014)']
model_f = ['RCP45.COMB', 'RCP85.COMB', 'HadGEM3-GC31-HH(20212050)', 'CNRM-CM6-1-HR(20212050)']
num_models = len(model_p)
output_file = 'stat_results/kstest_allintdist.txt'

for i in range(len(distance)):
    for j in range(num_models):
        if distance[i] == 'NA.storms':             #specify file names
            tf_past = f'{model_p[j]}.NA.storms.nc'   
            tf_future = f'{model_f[j]}.NA.storms.nc'
        else:
            tf_past = f'{distance[i]}_analysis(noTD)/{model_p[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'
            tf_future = f'{distance[i]}_analysis(noTD)/{model_f[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'

        nstorms_p = get_total_storms(tf_past)      #get correct number of TCs in each file
        nstorms_f = get_total_storms(tf_future)
        all_intdist_p = get_all_intdist(tf_past)   #get first all intensity distribution
        all_intdist_f = get_all_intdist(tf_future) #get second all intensity distribution
  
        for k in range(len(alt_hyp)):              #conduct 2 sample KS test and output results to text file
            statistic, pvalue = stats.ks_2samp(all_intdist_p, all_intdist_f, alternative=alt_hyp[k])  

            with open(output_file, 'a') as f:      #write to output file depending on result
                f.write(f'2 Sample KS test on the all intensity distributions from the files (alt = {alt_hyp[k]}): \n')
                f.write(f'{tf_past}: num storms = {nstorms_p} \n')
                f.write(f'{tf_future}: num storms = {nstorms_f} \n')
                f.write(f'statistic = {statistic} \n')
                f.write(f'p value = {pvalue} \n')
                if pvalue < 0.05:
                    f.write('STATISTICALLY SIGNIFICANT DIFFERENCE \n')
                f.write('\n')

### Program to conudct 2-sided T tests or KS tests on the data of max intensities within the ___ km buffer from the track files 

In [7]:
import xarray as xr
import numpy as np
from scipy import stats
from tc_functions.calculations import get_max_intdist

#initialize lists of distances, hypothesis and models
#also find total number of models and set output file name

alt_hyp = ['less', 'greater']
distance = ['100km', '200km', '300km', 'NA.storms']
model_p = ['REF.COMB', 'REF.COMB', 'HadGEM3-GC31-HH(19852014)', 'CNRM-CM6-1-HR(19852014)']
model_f = ['RCP45.COMB', 'RCP85.COMB', 'HadGEM3-GC31-HH(20212050)', 'CNRM-CM6-1-HR(20212050)']
num_models = len(model_p)
output_file = 'stat_results/kstest_maxintdist.txt'

for i in range(len(distance)):
    for j in range(num_models):
        if distance[i] == 'NA.storms':             #specify file names
            tf_past = f'{model_p[j]}.NA.storms.nc'   
            tf_future = f'{model_f[j]}.NA.storms.nc'
        else:
            tf_past = f'{distance[i]}_analysis(noTD)/{model_p[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'
            tf_future = f'{distance[i]}_analysis(noTD)/{model_f[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'

        max_intdist_p = get_max_intdist(tf_past)   #get first max intensity distribution
        max_intdist_f = get_max_intdist(tf_future) #get second max intensity distribution
  
        for k in range(len(alt_hyp)):              #conduct 2 sample KS test and output results to text file
            statistic, pvalue = stats.ks_2samp(max_intdist_p, max_intdist_f, alternative=alt_hyp[k])  

            with open(output_file, 'a') as f:      #write to output file depending on result
                f.write(f'2 Sample KS test on the max intensity distributions from the files (alt = {alt_hyp[k]}): \n')
                f.write(f'{tf_past}: num storms = {len(max_intdist_p)} \n')
                f.write(f'{tf_future}: num storms = {len(max_intdist_f)} \n')
                f.write(f'statistic = {statistic} \n')
                f.write(f'p value = {pvalue} \n')
                if pvalue < 0.05:
                    f.write('STATISTICALLY SIGNIFICANT DIFFERENCE \n')
                f.write('\n')

### Program to conudct 2-sided T tests or KS tests on the data of average intensities within the ___ km buffer from the track files 

In [6]:
import xarray as xr
import numpy as np
from scipy import stats
from tc_functions.calculations import get_avg_intdist

#initialize lists of distances, hypothesis and models
#also find total number of models and set output file name

alt_hyp = ['less', 'greater']
distance = ['100km', '200km', '300km', 'NA.storms']
model_p = ['REF.COMB', 'REF.COMB', 'HadGEM3-GC31-HH(19852014)', 'CNRM-CM6-1-HR(19852014)']
model_f = ['RCP45.COMB', 'RCP85.COMB', 'HadGEM3-GC31-HH(20212050)', 'CNRM-CM6-1-HR(20212050)']
num_models = len(model_p)
output_file = 'stat_results/kstest_avgintdist.txt'

for i in range(len(distance)):
    for j in range(num_models):
        if distance[i] == 'NA.storms':              #specify file names
            tf_past = f'{model_p[j]}.NA.storms.nc'   
            tf_future = f'{model_f[j]}.NA.storms.nc'
        else:
            tf_past = f'{distance[i]}_analysis(noTD)/{model_p[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'
            tf_future = f'{distance[i]}_analysis(noTD)/{model_f[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'

        avg_intdist_p = get_avg_intdist(tf_past)    #get first avg intensity distribution
        avg_intdist_f = get_avg_intdist(tf_future)  #get second avg intensity distribution
  
        for k in range(len(alt_hyp)):               #conduct 2 sample KS test and output results to text file
            statistic, pvalue = stats.ks_2samp(avg_intdist_p, avg_intdist_f, alternative=alt_hyp[k])  

            with open(output_file, 'a') as f:       #write to output file depending on result
                f.write(f'2 Sample KS test on the avg intensity distributions from the files (alt = {alt_hyp[k]}): \n')
                f.write(f'{tf_past}: num storms = {len(avg_intdist_p)} \n')
                f.write(f'{tf_future}: num storms = {len(avg_intdist_f)} \n')
                f.write(f'statistic = {statistic} \n')
                f.write(f'p value = {pvalue} \n')
                if pvalue < 0.05:
                    f.write('STATISTICALLY SIGNIFICANT DIFFERENCE \n')
                f.write('\n')

### Program to conduct 2 sample KS tests on the translation speed distributions of two specific files

In [None]:
import xarray as xr
import numpy as np
import math
from scipy import stats
from tc_functions.calculations import get_6hr_tsdist

#initialize lists of distances, hypothesis and models
#also find total number of models and set output file name

alt_hyp = ['less', 'greater']
distance = ['100km', '200km', '300km', 'NA.storms']
model_p = ['REF.COMB', 'REF.COMB', 'HadGEM3-GC31-HH(19852014)', 'CNRM-CM6-1-HR(19852014)']
model_f = ['RCP45.COMB', 'RCP85.COMB', 'HadGEM3-GC31-HH(20212050)', 'CNRM-CM6-1-HR(20212050)']
num_models = len(model_p)
output_file = 'stat_results/kstest_tsdist.txt'

for i in range(len(distance)):
    for j in range(num_models):
        if distance[i] == 'NA.storms':          #specify file names
            tf_past = f'{model_p[j]}.NA.storms.nc'   
            tf_future = f'{model_f[j]}.NA.storms.nc'
        else:
            tf_past = f'{distance[i]}_analysis(noTD)/{model_p[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'
            tf_future = f'{distance[i]}_analysis(noTD)/{model_f[j]}.NA.landfalling.storms.{distance[i]}.buffer.pts.nc'

        ts_dist_p = get_6hr_tsdist(tf_past)     #get first ts distribution
        ts_dist_f = get_6hr_tsdist(tf_future)   #get second ts distribution
  
        for k in range(len(alt_hyp)):           #conduct 2 sample KS test and output results to text file
            statistic, pvalue = stats.ks_2samp(ts_dist_p, ts_dist_f, alternative=alt_hyp[k])  

            with open(output_file, 'a') as f:   #write to output file depending on result
                f.write(f'2 Sample KS test on the ts distributions from the files (alt = {alt_hyp[k]}): \n')
                f.write(f'{tf_past}: num data points = {len(ts_dist_p)} \n')
                f.write(f'{tf_future}: num data points = {len(ts_dist_f)} \n')
                f.write(f'statistic = {statistic} \n')
                f.write(f'p value = {pvalue} \n')
                if pvalue < 0.05:
                    f.write('STATISTICALLY SIGNIFICANT DIFFERENCE \n')
                f.write('\n')

### Program to plot distributions from two files to confirm results from a KS test

In [None]:
import numpy as np
import xarray as xr
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tf1 = 'REF.COMB.NA.storms.nc'
tf2 = 'RCP85.COMB.NA.storms.nc'

res1 = get_max_intdist(tf1)
res2 = get_max_intdist(tf2)

#print(max(res1), min(res1))
#print(max(res2), min(res2))

#statistic, pvalue = stats.ks_2samp(res1, res2, alternative='greater')
#print(statistic, pvalue)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12,7)) #initialize figure and axes

bin_intervals = np.arange(0, 110, 1)       #set bin intervals

ax.hist(res2, bins=bin_intervals, cumulative=True, density=True, histtype = 'step',
        color='orange', linewidth=2, label='RCP4.5')
ax.hist(res1, bins=bin_intervals, cumulative=True, density=True, histtype = 'step',
        color='b', linewidth=2, label='REF')
        
fontsize = 12
ax.set_title('REF/RCP8.5 max int dist cdf (entire NA basin)', fontsize=fontsize)  #set specific titles for each axis
ax.set_ylabel('Probability', fontsize=fontsize)
ax.set_xlabel('Wind Speed (m/s)', fontsize=fontsize)
ax.legend(loc='upper left')

plt.savefig("A_NAtest_refrcp85_maxcdf.png")           #save and close figure
plt.close(fig)