In [1]:
#MAIN LOOP - where we run the simulations to figure out statistical significance of changes

In [14]:
import matplotlib.pyplot as plt
import numpy as np
import time
import os
import netCDF4 as nc
import matplotlib.pyplot as plt
%matplotlib notebook
%load_ext autotime
from bootstrap import *

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 4.35 ms


In [2]:
ferretpath = "/Users/Siwen/Desktop/ferret/bin/"
ff = "Pchina_type.nc"
filename = ferretpath + ff
myf = nc.Dataset(filename, 'r')

time: 6.57 ms


In [3]:
P_total =  myf.variables['Pchina_total'][:]
P_band = myf.variables['Pchina_band'][:]
P_local = myf.variables['Pchina_local'][:]

time: 47.7 ms


In [4]:
def smooth(P,dayrange,latrange):
    Psmooth=np.zeros(P.shape)
    
    ll=int((dayrange-1)/2)
    yy=int((latrange-1)/2)
    
    days = P.shape[0]
    lats = P.shape[1]
    
    for d in range(days):
        for j in range(lats):
            
            Psample = P.take(range(d-ll,d+ll+1), mode='wrap', axis=0)
            Ps = Psample[:,max(0,j-yy):min(days,j+yy+1)]
            Psmooth[d,j] = np.mean(np.mean(Ps))
    
    return Psmooth

time: 7.25 ms


In [5]:
#years of interest - start and end date, with index starting at 1 out of habit.
p1_yr_b = [1,30]
p1_yr_e = [29,43]
p2_yr_b = [30,44]
p2_yr_e = [57,57]

P_total_diff = []
P_band_diff = []
P_local_diff = []

#cycle through all time periods of interest and find changes in each type of rainfall b/w periods
for p1_b, p1_e, p2_b, p2_e, i in zip(p1_yr_b, p1_yr_e, p2_yr_b, p2_yr_e, np.arange(len(p1_yr_b))): 
    
    P_total_diff.append(np.mean(P_total[:,:,p2_b-1:p2_e], axis=2) - \
                         np.mean(P_total[:,:,p1_b-1:p1_e], axis=2))
    P_band_diff.append(np.mean(P_band[:,:,p2_b-1:p2_e], axis=2) \
                       - np.mean(P_band[:,:,p1_b-1:p1_e], axis=2))
    P_local_diff.append(np.mean(P_local[:,:,p2_b-1:p2_e], axis=2) \
                        - np.mean(P_local[:,:,p1_b-1:p1_e], axis=2))

time: 21.1 ms


In [6]:
## SMOOTHING ##
#smooth out the decadal precipitation means
#but, no latitudinal smoothing since rainfall already effectively smoothed in latitude
daysmth = 15
latsmth = 0

#one line! thank you python!
P_total_diff_smth = [ smooth(x,daysmth,latsmth) for x in P_total_diff ]
P_band_diff_smth = [ smooth(x,daysmth,latsmth) for x in P_band_diff ]
P_local_diff_smth = [ smooth(x,daysmth,latsmth) for x in P_local_diff ]

time: 3.77 s


In [None]:
plt.pcolor(P_local_diff_smth[1].T)

In [18]:
## BOOTSTRAPPING ##
#from previous testing, determined that the autocorrelation time scale of rainfall is between
#2 and 3 days. Will test both and see effect on p-values.

#General expectation would be that longer block lengths will lead to fewer significant p-values
#(smaller effective sample size

#daysmth is defined above
niter = 500 #how many iterations each time?
blklen = 2

filepath = "/Users/Siwen/Desktop/ferret/bin/chinarain_diff_pval_"

#define netCDF dimensions
dim_tuple = ("days","lat")

#each of these loops is very time-consuming, hence why we split them up
#TOTAL RAINFALL
for p1_b, p1_e, p2_b, p2_e, i in zip(p1_yr_b, p1_yr_e, p2_yr_b, p2_yr_e, np.arange(len(p1_yr_b))): 

    pval = bs_diff_rain(P_total,[p1_b,p1_e],[p2_b,p2_e],daysmth,niter,blklen)
    
    #netCDF formatting
    yearsout = str(p1_b+50) + str(p1_e+50) + "_" + str(p2_b+50) + str(p2_e+50) + "_"
    fullname = filepath + yearsout + "total" + "_" + str(blklen) +"day_" + str(niter) + "iter.nc"
    fileout = nc.Dataset(fullname, "w")
    days_dim = fileout.createDimension('days', None)
    lat_dim = fileout.createDimension('lat', 80)
    
    pval_total = fileout.createVariable("pval", 'f8', dim_tuple)
    pval_total[:] = pval
    fileout.close()







<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): 
    variables(dimensions): 
    groups: 

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): 
    variables(dimensions): 
    groups: 

time: 53.2 s


In [None]:
#BANDED RAINFALL
for p1_b, p1_e, p2_b, p2_e, i in zip(p1_yr_b, p1_yr_e, p2_yr_b, p2_yr_e, np.arange(len(p1_yr_b))): 

    pval = bs_diff_rain(P_band,[p1_b,p1_e],[p2_b,p2_e],daysmth,niter,blklen)
    
    #netCDF formatting
    yearsout = str(p1_b+50) + str(p1_e+50) + "_" + str(p2_b+50) + str(p2_e+50) + "_"
    fullname = filepath + yearsout + "band" + "_" + str(blklen) +"day_" + str(niter) + "iter.nc"
    fileout = nc.Dataset(fullname, "w")
    days_dim = fileout.createDimension('days', None)
    lat_dim = fileout.createDimension('lat', 80)
    
    pval_total = fileout.createVariable("pval", 'f8', dim_tuple)
    pval_total[:] = pval
    fileout.close()

In [None]:
#LOCAL RAINFALL
for p1_b, p1_e, p2_b, p2_e, i in zip(p1_yr_b, p1_yr_e, p2_yr_b, p2_yr_e, np.arange(len(p1_yr_b))): 

    pval = bs_diff_rain(P_local,[p1_b,p1_e],[p2_b,p2_e],daysmth,niter,blklen)
    
    #netCDF formatting
    yearsout = str(p1_b+50) + str(p1_e+50) + "_" + str(p2_b+50) + str(p2_e+50) + "_"
    fullname = filepath + yearsout + "local" + "_" + str(blklen) +"day_" + str(niter) + "iter.nc"
    fileout = nc.Dataset(fullname, "w")
    days_dim = fileout.createDimension('days', None)
    lat_dim = fileout.createDimension('lat', 80)
    
    pval_total = fileout.createVariable("pval", 'f8', dim_tuple)
    pval_total[:] = pval
    fileout.close()