In [1]:
## Non-parametric calculation of the significance of changes in intensity for intensity Hovmoller plots
# based in large part on china_rain_diff.ipynb, and previous library of bootstrap codes (bootstrap.py)

In [2]:
import math
import matplotlib.pyplot as plt
import numpy as np
import time
import os
import netCDF4 as nc
import matplotlib.pyplot as plt
%matplotlib notebook
%load_ext autotime
from bootstrap import bs_means_diff, collect
from shutil import copyfile
import numpy.ma as ma

In [3]:
#load hovmoller storage of intensity
ferretpath = "/Users/Siwen/Desktop/ferret/bin/"
hovfile = "RDA_int_hov.nc"
file1 = ferretpath + hovfile
hovf = nc.Dataset(file1, 'r')

#load hovmoller climatology (mean, standard deviation and standard deviation of sample mean)
#climofile = "RDA_int_climo.nc"
#file2 = ferretpath + climofile
#climof = nc.Dataset(file2, 'r')

time: 9.47 ms


In [5]:
## BOOTSTRAPPING ##
#from previous testing, determined that the autocorrelation time scale of rainfall is between
#2 and 3 days. Will test both and see effect on p-values.

#General expectation would be that longer block lengths will lead to fewer significant p-values
#(smaller effective sample size

##REWRITTEN SEPTEMBER 26, 2016
#previous version had to run for potentially days at a time, whereas now it saves each new row as it completes it.

#daysmth is defined above
niter = 2000 #how many iterations each time?

p1_beg = [1951,1980]
p1_end = [1979,1993]
p2_beg = [1980,1994]
p2_end = [2007,2007]

dayrange = 15 #what range of days are we smoothing over?
latrange = 5 #what range of latitude are we smoothing over? (1 degree increments)

save_path = "RDA_int_diff_bootstrap_" + str(niter) + "iter_perm.nc"
main_folder = "/Users/Siwen/RDA/Data/"
main_save = main_folder + save_path
backup_folder = "/Users/Siwen/Desktop/Ferret/bin/"
backup_save = backup_folder + save_path

if os.path.isfile(main_save):
    os.remove(main_save)
    
if os.path.isfile(backup_save):
    os.remove(backup_save)

fileout = nc.Dataset(main_save, "w")

days_dim = fileout.createDimension('time', None)
lat_dim = fileout.createDimension('lat', 22)
dim_tuple = ("time","lat")

## PRIMARY FRONTS ONLY ##
#actual bootstrapping loop - loads data from periods of interest, then performs bootstrapping calculations.
for p1_b, p1_e, p2_b, p2_e in zip(p1_beg, p1_end, p2_beg, p2_end):
    
    years_p1= str(p1_b-1900) + str("{0:0=2d}".format((p1_e-1900)%100))
    print(years_p1)
    years_p2= str(p2_b-1900) + str("{0:0=2d}".format((p2_e-1900)%100))
    print(years_p2)
    years = years_p2 + '_' + years_p1
    
    int_p1 = hovf['int_1_hov_' + years_p1][:]
    int_p2 = hovf['int_1_hov_' + years_p2][:]
    
    pval = np.empty([int_p1.shape[0], int_p1.shape[1]])
    
    #BOOTSTRAPPING CALCULATION OF P-VALUE OF INTENSITY CHANGES BETWEEN TIME PERIODS
    for i in range(int_p1.shape[0]):
        
        print(i)
        
        for j in range(int_p1.shape[1]):
            
            s1 = collect(int_p1, i,j, dayrange, latrange)
            s2 = collect(int_p2, i,j, dayrange, latrange)
            
            if ((len(s1) > 0) & (len(s2) > 0)):
                #pval[i,j] = bs_means_diff(s2, s1, niter)[1]
                pval[i,j] = bs_means_diff(s2, s1, niter, method='perm')[1]
                
            else:
                pval[i,j] = np.NaN
    
    ## SAVE OUTPUT AS NETCDF FILE
    #create variables inside netCDF file
    pval_out_int_1 = fileout.createVariable("int_1_pval_" + years, 'f8', dim_tuple)
    pval_out_int_1[:] = ma.array(pval, mask = np.isnan(pval))
    
copyfile(main_save, backup_save) #backs file up in case of corruption
fileout.close()
hovf.close()

SyntaxError: positional argument follows keyword argument (<ipython-input-5-b2eec625f93b>, line 67)

In [None]:
## ALL FRONTS ##
#actual bootstrapping loop - loads data from periods of interest, then performs bootstrapping calculations.

fileout = nc.Dataset(main_save, "a")
hovf = nc.Dataset(file1, 'r')

for p1_b, p1_e, p2_b, p2_e in zip(p1_beg, p1_end, p2_beg, p2_end):
    
    years_p1= str(p1_b-1900) + str("{0:0=2d}".format((p1_e-1900)%100))
    print(years_p1)
    years_p2= str(p2_b-1900) + str("{0:0=2d}".format((p2_e-1900)%100))
    print(years_p2)
    years = years_p2 + '_' + years_p1
    
    int_p1 = hovf['int_hov_' + years_p1][:]
    int_p2 = hovf['int_hov_' + years_p2][:]
    
    pval = np.empty([int_p1.shape[0], int_p1.shape[1]])
    
    #BOOTSTRAPPING CALCULATION OF P-VALUE OF INTENSITY CHANGES BETWEEN TIME PERIODS
    for i in range(int_p1.shape[0]):
        
        print(i)
        
        for j in range(int_p1.shape[1]):
            
            s1 = collect(int_p1, i,j, 15, 5)
            s2 = collect(int_p2, i,j, 15, 5)
            
            if ((len(s1) > 0) & (len(s2) > 0)):
                #pval[i,j] = bs_means_diff(s2, s1, niter)[1]
                pval[i,j] = bs_means_diff(s2, s1, niter, method='perm')[1]
                
            else:
                pval[i,j] = np.NaN
    
    ## SAVE OUTPUT AS NETCDF FILE
    #create variables inside netCDF file
    pval_out_int = fileout.createVariable("int_pval_" + years, 'f8', dim_tuple)
    pval_out_int[:] = ma.array(pval, mask = np.isnan(pval))

copyfile(main_save, backup_save) #backs file up in case of corruption
fileout.close()
hovf.close()