In [1]:
#quick script to find the autocorrelation in time of front frequency for each 1 degree latitude bin
#without accounting for autocorrelation, changes in frequency will be found to be more significant
#than they actually are

#a bunch of this code is copied over from RDA_hov_periods.nc and slightly adapted.

In [1]:
import numpy as np
import time
import os
import netCDF4 as nc
import datetime
import matplotlib.pyplot as plt
from autocorr import autocorr
%matplotlib notebook

In [2]:
#Access NetCDF files (currently hosted in Ferret directory)
RDA_path_1 = "/Users/Siwen/Desktop/ferret/bin/meiyu_clean.nc"
RDA_path_2 = "/Users/Siwen/Desktop/ferret/bin/meiyu_2_clean.nc"
RDA_1 = nc.Dataset(RDA_path_1, 'r')
RDA_2 = nc.Dataset(RDA_path_2, 'r')

In [3]:
#load data from NetCDF files to notebook
lat_1_all =  RDA_1.variables['lat_115'][:]
lat_2_all =  RDA_2.variables['lat_115'][:]
intensity_1_all = RDA_1.variables['intensity'][:] 
intensity_2_all = RDA_2.variables['intensity'][:]

RDA_1.close()
RDA_2.close()

In [4]:
#Assign a calendar date to each time point
startday = datetime.datetime(1951,1,1)
date_list = np.array([datetime.timedelta(days=x) + startday for x in range(0, 20819)])

In [5]:
#quick function that bins everything in a 2D array and puts floor and ceiling on array values.
def findbins(lat):
    index = np.ndarray.round(lat-19.5)
    index[index < 0] = 0
    index[index > 21] = 21
    return index

In [6]:
#bin latitudes
lat_1_indices = findbins(lat_1_all)
lat_2_indices = findbins(lat_2_all)



In [7]:
#create a time series of whether a rainband exists for each latitude bin
occupancy_1 = np.zeros([20819,22])
occupancy_2 = np.zeros([20819,22])

for i, index in enumerate(lat_1_indices):
    if ~np.isnan(index):
        occupancy_1[i,index] = 1
        
for i, index in enumerate(lat_2_indices):
    if ~np.isnan(index):
        occupancy_2[i,index] = 1

#also create combined occupancy matrix of both primary and secondary fronts
occupancy = occupancy_1 + occupancy_2



In [8]:
def smooth(P,dayrange,latrange):
    Psmooth=np.zeros(P.shape)
    
    ll=int((dayrange-1)/2)
    yy=int((latrange-1)/2)
    
    days = P.shape[0]
    lats = P.shape[1]
    
    for d in range(days):
        for j in range(lats):
            
            Psample = P.take(range(d-ll,d+ll+1), mode='wrap', axis=0)
            Ps = Psample[:,max(0,j-yy):min(days,j+yy+1)]
            Psmooth[d,j] = np.mean(np.mean(Ps))
    
    return Psmooth

In [9]:
#switching it to binary - hence the *5. Wanted to keep same def of smooth function as elsewhere
#NOTE: in some cases, values can reach 2 (if both fronts are within small latitude range on a given day)
#should be a minimal effect - we test both keeping the 2 and reducing to 1.
occupancy_1_smth = smooth(occupancy_1,1,5)*5
occupancy_2_smth = smooth(occupancy_2,1,5)*5
occupancy_smth = smooth(occupancy,1,5)*5


In [10]:
#as constructed, occupancy_smth can have values of 2 if fronts are close enough - this fixes that.
occupancy_smth_binary = np.copy(occupancy_smth) #without using copy, still points to same array.
occupancy_smth_binary[occupancy_smth_binary>1] = 1

In [11]:
#quick comparison of how much the smoothing in latitude changes mean front occupancy
print(np.mean(occupancy, axis=0))
print(np.mean(occupancy_smth, axis=0))

[ 0.00048033  0.00086459  0.00384264  0.02877179  0.02656227  0.03227821
  0.03443969  0.03818627  0.04303761  0.03669725  0.03247034  0.02987656
  0.01902109  0.01998175  0.01565877  0.01508238  0.01196023  0.00979874
  0.00667659  0.00614823  0.00403478  0.00134493]
[ 0.00864595  0.04244921  0.06052164  0.09231952  0.12589462  0.16023824
  0.17450406  0.18463903  0.18483116  0.18026802  0.16110284  0.13804698
  0.1170085   0.09962054  0.08170421  0.07248187  0.05917671  0.04966617
  0.03861857  0.02800327  0.02275566  0.01921322]


In [16]:
ac_1 = np.array([ autocorr(x)[1] for x in occupancy_1.T ])
ac_2 = np.array([ autocorr(x)[1] for x in occupancy_2.T ])
ac = np.array([ autocorr(x)[1] for x in occupancy.T ])

ac_smth_1 = np.array([ autocorr(x)[1] for x in occupancy_1_smth.T ])
ac_smth_2 = np.array([ autocorr(x)[1] for x in occupancy_2_smth.T ])
ac_smth = np.array([ autocorr(x)[1] for x in occupancy_smth.T ])

ac_smth_binary = np.array([ autocorr(x)[1] for x in occupancy_smth_binary.T ])

In [17]:
print(ac_smth_binary)

[ 1.26800427  2.06563188  2.19680495  2.41971398  2.63872406  2.91905467
  3.13065542  3.14236761  3.09594696  3.02263046  2.7621429   2.7015617
  2.78921635  3.09993799  3.48683284  3.55399918  3.32081316  3.06894202
  2.65484529  2.4286421   2.09128261  1.93877854]


In [36]:
## FIND AUTOCORRELATION FOR ARBITRARY TIME PERIODS ##
#much like the RDA_freq_diff script, now taking the next step and letting autocorrelation be calculated over arbitrary
#time periods.

#it's reasonable for autocorrelation to change a lot across the year, so worth checking.
day_beg = [1,121,304,61,121,161,201,274,321]
day_end = [365,303,120,120,160,200,273,320,60]

#stores autocorrelation time scales for each latitude box.
tau_1 = [] #primary front only
tau_2 = [] #secondary front only
tau = [] #both primary and secondary

for db, de in zip(day_beg, day_end):
    
    #date_list defined above as list of dates between January 1, 1951 and Dec 31, 2007
    #mysterious .timetuple().tm_yday function below returns day of year given date.
    if db <= de:
        mymap = map(lambda date: (date.timetuple().tm_yday >= db) & (date.timetuple().tm_yday <= de), date_list)
    else: #if de is earlier in the year than db, then we have to wrap around the new year.
        mymap = map(lambda date: not((date.timetuple().tm_yday > de) & (date.timetuple().tm_yday < db)), date_list)
        
    extract = np.array(list(mymap))
    
    ## primary fronts
    oc_1 = np.copy(occupancy_1_smth)
    oc_1_mean = np.mean(occupancy_1_smth[extract],0) #this is the mean during time period of interest
    oc_1[~extract] = oc_1_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    ## secondary fronts
    oc_2 = np.copy(occupancy_2_smth)
    oc_2_mean = np.mean(occupancy_2_smth[extract],0) #this is the mean during time period of interest
    oc_2[~extract] = oc_2_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    ## all fronts
    oc = np.copy(occupancy_smth_binary)
    oc_mean = np.mean(occupancy_smth_binary[extract],0) #this is the mean during time period of interest
    oc[~extract] = oc_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    tau_1.append(np.array([ autocorr(x)[1] for x in oc_1.T ]))
    tau_2.append(np.array([ autocorr(x)[1] for x in oc_2.T ]))
    tau.append(np.array([ autocorr(x)[1] for x in oc.T ]))
    
    print(db,de)
    #print(tau_1[-1])
    #print(tau_2[-1])
    print(tau[-1])
    
    #small debugging module to verify that the actual standard deviation of our hacked time series is the same as
    #the theoretical expectation for a Bernoulli time-series (p*(1-p))**.5
    #print((oc_mean*(1-oc_mean))**.5)
    #print(np.std(oc,0))

1 365
[ 1.26786473  2.06815904  2.19667512  2.41851382  2.63731245  2.91657888
  3.12864151  3.14013304  3.0929943   3.02005989  2.75926658  2.69866058
  2.78678746  3.09818152  3.48510832  3.55233922  3.31952351  3.067708
  2.65380782  2.42789364  2.0907967   1.93847172]
121 303
[ 1.25910986  1.7751358   1.77983667  1.94169548  2.24122216  2.55674112
  3.13345049  3.18741965  3.04275965  3.07778672  2.65459302  2.55384506
  2.59913389  2.80532258  3.00901351  2.93672505  2.70481988  2.43793793
  2.09884922  2.01092165  1.80647995  1.75284482]


  yy_norm = (yy - mn) / stdv


304 120
[ 0.97307575  1.30752568  1.83982645  2.31805864  2.54438857  2.91290018
  2.94216576  2.9485238   2.98618978  2.76847747  2.57404321  2.36503662
  2.10530157  1.75479571  1.70420055  1.52759612  1.31999537  1.19659115
  1.13164305  1.09312178  1.10815138  0.98879426]
61 120
[ 0.93190695  1.20765554  1.61502509  1.97290452  1.87763004  1.93885051
  1.73141673  1.71222539  1.8471951   1.95880991  1.90398223  1.85987348
  1.84289566  1.55381197  1.66999157  1.5074714   1.15597072  1.07008494
  1.08501374  1.0341087   0.95000157  0.97677   ]
121 160
[ 1.1175325   1.15846921  1.2845494   1.31815761  1.56142744  1.61892266
  1.7742786   1.82963536  1.3958954   1.61416402  1.54258473  1.61677349
  1.82639008  1.6085412   1.36110305  1.33357804  1.14662994  1.15135891
  1.14879411  1.14182157  1.27146641  1.17023637]
161 200
[ 1.08979805  1.3084742   1.42795773  1.51423239  1.80624523  2.25555689
  2.80670439  3.08561437  3.22276015  3.08813804  2.67759279  2.4756915
  2.63995025  2.8

In [19]:
## clear from the above that autocorrelation of rainbands changes dramatically between seasons.
# the yearly behavior is dominated by what goes on during Meiyu season, when the autocorrelation is super-high,
# but then the decorrelation time scale is significantly longer outside of it.

#Therefore, let's experiment with both - use a single yearly autocorrelation time scale value, and another, highly
#seasonalized value, and see what it does to the resulting frequency difference graphs.

In [48]:
## based on autocorrelation estimates above, create 3 sets of seasonally changing decorrelation time scales:
# 1) constant
# 2) half-years
# 3) seasonal, as defined in paper.

#each is defined for each latitude bin.

tau_constant = np.zeros([365,22])
tau_halfyear = np.zeros([365,22])
tau_seasonal = np.zeros([365,22])

#relies on the list of days defined earlier in this script:
#day_beg = [1,121,304,61,121,161,201,274,321]
#day_end = [365,303,120,120,160,200,273,320,60]


## CONSTANT YEARLY VALUE FOR TAU
for dd in range(365):
    tau_constant[dd,:] = tau[0]
    
print(tau_constant[:,10])
            

## HALF-YEAR TAU VALUES
for mytau, db, de in zip(tau[1:3],day_beg[1:3],day_end[1:3]):
    
    if db <= de:
        for dd in range(db-1,de):
            tau_halfyear[dd,:] = mytau
        
    else:
        for dd in range(db-1,de+365):
            tau_halfyear[dd%365,:] = mytau
            
print(tau_halfyear[:,10])            


## SEASONAL TAU VALUES
for mytau, db, de in zip(tau[3:],day_beg[3:],day_end[3:]):
    
    if db <= de:
        for dd in range(db-1,de):
            tau_seasonal[dd,:] = mytau
        
    else:
        for dd in range(db-1,de+365):
            tau_seasonal[dd%365,:] = mytau
            
print(tau_seasonal[:,10])

[ 2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658  2.75926658
  2.75926658  2.75926658  2.75926658  2.75926658  2

In [None]:
## SAVE AS NETCDF ##
