In [1]:
#quick script to find the autocorrelation in time of front frequency for each 1 degree latitude bin
#without accounting for autocorrelation, changes in frequency will be found to be more significant
#than they actually are

#a bunch of this code is copied over from RDA_hov_periods.nc and slightly adapted.

In [2]:
import numpy as np
import time
import os
import netCDF4 as nc
import datetime
import matplotlib.pyplot as plt
from autocorr import autocorr
%matplotlib notebook

In [3]:
#Access NetCDF files (currently hosted in Ferret directory)
RDA_path_1 = "/Users/Siwen/Desktop/ferret/bin/meiyu_clean.nc"
RDA_path_2 = "/Users/Siwen/Desktop/ferret/bin/meiyu_2_clean.nc"
RDA_1 = nc.Dataset(RDA_path_1, 'r')
RDA_2 = nc.Dataset(RDA_path_2, 'r')

In [4]:
#load data from NetCDF files to notebook
lat_1_all =  RDA_1.variables['lat_115'][:]
lat_2_all =  RDA_2.variables['lat_115'][:]
intensity_1_all = RDA_1.variables['intensity'][:] 
intensity_2_all = RDA_2.variables['intensity'][:]

RDA_1.close()
RDA_2.close()

In [5]:
#Assign a calendar date to each time point
startday = datetime.datetime(1951,1,1)
date_list = np.array([datetime.timedelta(days=x) + startday for x in range(0, 20819)])

In [6]:
#quick function that bins everything in a 2D array and puts floor and ceiling on array values.
def findbins(lat):
    index = np.ndarray.round(lat-19.5)
    index[index < 0] = 0
    index[index > 21] = 21
    return index

In [7]:
#bin latitudes
lat_1_indices = findbins(lat_1_all)
lat_2_indices = findbins(lat_2_all)



In [8]:
#create a time series of whether a rainband exists for each latitude bin
occupancy_1 = np.zeros([20819,22])
occupancy_2 = np.zeros([20819,22])

for i, index in enumerate(lat_1_indices):
    if ~np.isnan(index):
        occupancy_1[i,index] = 1
        
for i, index in enumerate(lat_2_indices):
    if ~np.isnan(index):
        occupancy_2[i,index] = 1

#also create combined occupancy matrix of both primary and secondary fronts
occupancy = occupancy_1 + occupancy_2



In [9]:
def smooth(P,dayrange,latrange):
    Psmooth=np.zeros(P.shape)
    
    ll=int((dayrange-1)/2)
    yy=int((latrange-1)/2)
    
    days = P.shape[0]
    lats = P.shape[1]
    
    for d in range(days):
        for j in range(lats):
            
            Psample = P.take(range(d-ll,d+ll+1), mode='wrap', axis=0)
            Ps = Psample[:,max(0,j-yy):min(days,j+yy+1)]
            Psmooth[d,j] = np.mean(np.mean(Ps))
    
    return Psmooth

In [10]:
#switching it to binary - hence the *5. Wanted to keep same def of smooth function as elsewhere
#NOTE: in some cases, values can reach 2 (if both fronts are within small latitude range on a given day)
#should be a minimal effect - we test both keeping the 2 and reducing to 1.
occupancy_1_smth = smooth(occupancy_1,1,5)*5
occupancy_2_smth = smooth(occupancy_2,1,5)*5
occupancy_smth = smooth(occupancy,1,5)*5


In [11]:
#as constructed, occupancy_smth can have values of 2 if fronts are close enough - this fixes that.
occupancy_smth_binary = np.copy(occupancy_smth) #without using copy, still points to same array.
occupancy_smth_binary[occupancy_smth_binary>1] = 1

In [12]:
#quick comparison of how much the smoothing in latitude changes mean front occupancy
print(np.mean(occupancy, axis=0))
print(np.mean(occupancy_smth, axis=0))

[ 0.00048033  0.00086459  0.00384264  0.02877179  0.02656227  0.03227821
  0.03443969  0.03818627  0.04303761  0.03669725  0.03247034  0.02987656
  0.01902109  0.01998175  0.01565877  0.01508238  0.01196023  0.00979874
  0.00667659  0.00614823  0.00403478  0.00134493]
[ 0.00864595  0.04244921  0.06052164  0.09231952  0.12589462  0.16023824
  0.17450406  0.18463903  0.18483116  0.18026802  0.16110284  0.13804698
  0.1170085   0.09962054  0.08170421  0.07248187  0.05917671  0.04966617
  0.03861857  0.02800327  0.02275566  0.01921322]


In [13]:
ac_1 = np.array([ autocorr(x)[1] for x in occupancy_1.T ])
ac_2 = np.array([ autocorr(x)[1] for x in occupancy_2.T ])
ac = np.array([ autocorr(x)[1] for x in occupancy.T ])

ac_smth_1 = np.array([ autocorr(x)[1] for x in occupancy_1_smth.T ])
ac_smth_2 = np.array([ autocorr(x)[1] for x in occupancy_2_smth.T ])
ac_smth = np.array([ autocorr(x)[1] for x in occupancy_smth.T ])

ac_smth_bin = np.array([ autocorr(x)[1] for x in occupancy_smth_bin.T ])

NameError: name 'occupancy_smth_bin' is not defined

In [None]:
print(ac_smth_bin)

In [22]:
## FIND AUTOCORRELATION FOR ARBITRARY TIME PERIODS ##
#much like the RDA_freq_diff script, now taking the next step and letting autocorrelation be calculated over arbitrary
#time periods.

#it's reasonable for autocorrelation to change a lot across the year, so worth checking.
day_beg = [1,61,121,161,201,274,121,304]
day_end = [365,120,160,200,273,320,303,120]

#stores autocorrelation time scales for each latitude box.
tau_1 = [] #primary front only
tau_2 = [] #secondary front only
tau = [] #both primary and secondary

for db, de in zip(day_beg, day_end):
    
    #date_list defined above as list of dates between January 1, 1951 and Dec 31, 2007
    #mysterious .timetuple().tm_yday function below returns day of year given date.
    if db <= de:
        mymap = map(lambda date: (date.timetuple().tm_yday >= db) & (date.timetuple().tm_yday <= de), date_list)
    else: #if de is earlier in the year than db, then we have to wrap around the new year.
        mymap = map(lambda date: not((date.timetuple().tm_yday > de) & (date.timetuple().tm_yday < db)), date_list)
        
    extract = np.array(list(mymap))
    
    ## primary fronts
    oc_1 = np.copy(occupancy_1_smth)
    oc_1_mean = np.mean(occupancy_1_smth[extract],0) #this is the mean during time period of interest
    oc_1[~extract] = oc_1_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    ## secondary fronts
    oc_2 = np.copy(occupancy_2_smth)
    oc_2_mean = np.mean(occupancy_2_smth[extract],0) #this is the mean during time period of interest
    oc_2[~extract] = oc_2_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    ## all fronts
    oc = np.copy(occupancy_smth)
    oc_mean = np.mean(occupancy_smth[extract],0) #this is the mean during time period of interest
    oc[~extract] = oc_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    
    tau_1.append(np.array([ autocorr(x)[1] for x in oc_1.T ]))
    tau_2.append(np.array([ autocorr(x)[1] for x in oc_2.T ]))
    tau.append(np.array([ autocorr(x)[1] for x in oc.T ]))
    
    print(db,de)
    #print(tau_1[-1])
    #print(tau_2[-1])
    print(tau[-1])
    
    #tau_1.append()
    #tau_2.append()

1 365
[ 1.23863743  1.83195368  2.00617857  2.31454492  2.60842887  2.94896813
  3.16738742  3.17483849  3.10884788  3.02127714  2.76232777  2.72173784
  2.7981953   3.04965482  3.3822395   3.42610165  3.20638706  3.05043119
  2.45100478  2.10334333  1.87109775  1.65179508]
[ 1.23479918  1.53752293  1.62496678  1.64855515  1.62323189  1.55434761
  1.16990725  1.07268825  1.01849921  1.17932918  1.27000542  1.42641524
  1.46986078  1.51219146  1.52991614  1.52594612  1.47225712  1.43363352
  1.57804975  1.59935997  1.49005743  1.51844159]
[ 1.26786473  2.06815904  2.19667512  2.41851382  2.63731245  2.91657888
  3.12864151  3.14235874  3.09085791  3.01693047  2.75478669  2.69934251
  2.78417335  3.09818152  3.48510832  3.55233922  3.31952351  3.067708
  2.65380782  2.42789364  2.0907967   1.93847172]


  yy_norm = (yy - mn) / stdv


61 120
[ 0.93190695  1.20765554  1.61502509  1.98849486  1.89301647  1.95484764
  1.75459228  1.72674992  1.84248906  1.96080999  1.90852329  1.90111025
  1.86724798  1.58585673  1.70911711  1.62778598  1.31087674  1.30385213
  1.36175221  1.24503954  0.98722104  0.99361239]
[        nan         nan         nan  0.99361239  0.99361239  0.99361239
  0.98550989  0.98550989  0.97911243  0.97271122  0.96400118  0.9610705
  0.9610705   1.20534087  1.12374338  0.93715996  0.92782578  0.91495564
  0.93017485  0.95000157  0.96282854  0.98316815]
[ 0.93190695  1.20765554  1.61502509  1.97290452  1.87763004  1.93885051
  1.73141673  1.71222539  1.83860544  1.95678937  1.90398223  1.85987348
  1.84289566  1.55381197  1.66999157  1.5074714   1.15597072  1.07008494
  1.08501374  1.0341087   0.95000157  0.97677   ]
121 160
[ 1.1175325   1.12204437  1.23698711  1.23049316  1.49342381  1.54644015
  1.72427152  1.82678976  1.41019296  1.60420902  1.54368471  1.6509421
  1.86815591  1.82664728  1.621242

In [68]:
date_list[5000].timetuple().tm_yday

253