In [1]:
#quick script to find the autocorrelation in time of front frequency for each 1 degree latitude bin
#without accounting for autocorrelation, changes in frequency will be found to be more significant
#than they actually are

#a bunch of this code is copied over from RDA_hov_periods.nc and slightly adapted.

In [1]:
import numpy as np
import time
import os
import netCDF4 as nc
import datetime
import matplotlib.pyplot as plt
from autocorr import autocorr
%matplotlib notebook

In [2]:
#Access NetCDF files (currently hosted in Ferret directory)
RDA_path_1 = "/Users/Siwen/Desktop/ferret/bin/meiyu_clean.nc"
RDA_path_2 = "/Users/Siwen/Desktop/ferret/bin/meiyu_2_clean.nc"
RDA_1 = nc.Dataset(RDA_path_1, 'r')
RDA_2 = nc.Dataset(RDA_path_2, 'r')

In [3]:
#load data from NetCDF files to notebook
lat_1_all =  RDA_1.variables['lat_115'][:]
lat_2_all =  RDA_2.variables['lat_115'][:]
intensity_1_all = RDA_1.variables['intensity'][:] 
intensity_2_all = RDA_2.variables['intensity'][:]

RDA_1.close()
RDA_2.close()

In [4]:
#Assign a calendar date to each time point
startday = datetime.datetime(1951,1,1)
date_list = np.array([datetime.timedelta(days=x) + startday for x in range(0, 20819)])

In [5]:
#quick function that bins everything in a 2D array and puts floor and ceiling on array values.
def findbins(lat):
    index = np.ndarray.round(lat-19.5)
    index[index < 0] = 0
    index[index > 21] = 21
    return index

In [6]:
#bin latitudes
lat_1_indices = findbins(lat_1_all)
lat_2_indices = findbins(lat_2_all)



In [7]:
#create a time series of whether a rainband exists for each latitude bin
occupancy_1 = np.zeros([20819,22])
occupancy_2 = np.zeros([20819,22])

for i, index in enumerate(lat_1_indices):
    if ~np.isnan(index):
        occupancy_1[i,index] = 1
        
for i, index in enumerate(lat_2_indices):
    if ~np.isnan(index):
        occupancy_2[i,index] = 1

#also create combined occupancy matrix of both primary and secondary fronts
occupancy = occupancy_1 + occupancy_2



In [8]:
def smooth(P,dayrange,latrange):
    Psmooth=np.zeros(P.shape)
    
    ll=int((dayrange-1)/2)
    yy=int((latrange-1)/2)
    
    days = P.shape[0]
    lats = P.shape[1]
    
    for d in range(days):
        for j in range(lats):
            
            Psample = P.take(range(d-ll,d+ll+1), mode='wrap', axis=0)
            Ps = Psample[:,max(0,j-yy):min(days,j+yy+1)]
            Psmooth[d,j] = np.mean(np.mean(Ps))
    
    return Psmooth

In [43]:
#switching it to binary - hence the *5. Wanted to keep same def of smooth function as elsewhere
#NOTE: in some cases, values can reach 2 (if both fronts are within small latitude range on a given day)
#should be a minimal effect - we test both keeping the 2 and reducing to 1.
occupancy_1_smth = smooth(occupancy_1,1,5)*5
occupancy_2_smth = smooth(occupancy_2,1,5)*5
occupancy_smth = smooth(occupancy,1,5)*5


In [44]:
#as constructed, occupancy_smth can have values of 2 if fronts are close enough - this fixes that.
occupancy_smth_bin = np.copy(occupancy_smth) #without using copy, still points to same array.
occupancy_smth_bin[occupancy_smth_bin>1] = 1

In [45]:
#quick comparison of how much the smoothing in latitude changes mean front occupancy
print(np.mean(occupancy, axis=0))
print(np.mean(occupancy_smth, axis=0))

[ 0.00048033  0.00086459  0.00384264  0.02877179  0.02656227  0.03227821
  0.03443969  0.03818627  0.04303761  0.03669725  0.03247034  0.02987656
  0.01902109  0.01998175  0.01565877  0.01508238  0.01196023  0.00979874
  0.00667659  0.00614823  0.00403478  0.00134493]
[ 0.00864595  0.04244921  0.06052164  0.09231952  0.12589462  0.16023824
  0.17450406  0.18463903  0.18483116  0.18026802  0.16110284  0.13804698
  0.1170085   0.09962054  0.08170421  0.07248187  0.05917671  0.04966617
  0.03861857  0.02800327  0.02275566  0.01921322]


In [55]:
ac_1 = np.array([ autocorr(x)[1] for x in occupancy_1.T ])
ac_2 = np.array([ autocorr(x)[1] for x in occupancy_2.T ])
ac = np.array([ autocorr(x)[1] for x in occupancy.T ])

ac_smth_1 = np.array([ autocorr(x)[1] for x in occupancy_1_smth.T ])
ac_smth_2 = np.array([ autocorr(x)[1] for x in occupancy_2_smth.T ])
ac_smth = np.array([ autocorr(x)[1] for x in occupancy_smth.T ])

ac_smth_bin = np.array([ autocorr(x)[1] for x in occupancy_smth_bin.T ])

[ 0.99230769  0.98460947  1.17052554  1.69392751  1.48348003  1.53272874
  1.47109105  1.4242533   1.44473318  1.46220886  1.45358057  1.49637819
  1.42839143  1.68752802  1.36595844  1.83961576  1.46821743  1.34332697
  1.30853299  1.68975292  1.16801404  1.10139564]


In [62]:
print(ac_smth_bin)

[ 1.26800427  2.06563188  2.19680495  2.41971398  2.63872406  2.91905467
  3.13065542  3.14236761  3.09594696  3.02263046  2.7621429   2.7015617
  2.78921635  3.09993799  3.48683284  3.55399918  3.32081316  3.06894202
  2.65484529  2.4286421   2.09128261  1.93877854]


In [83]:
## FIND AUTOCORRELATION FOR ARBITRARY TIME PERIODS ##
#much like the RDA_freq_diff script, now taking the next step and letting autocorrelation be calculated over arbitrary
#time periods.

#it's reasonable for autocorrelation to change a lot across the year, so worth checking.
day_beg = [1,61,121,161,201,274,121,304]
day_end = [365,120,160,200,273,320,303,120]

#stores autocorrelation time scales for each latitude box.
tau_1 = [] #primary front only
tau_2 = [] #secondary front only
tau = [] #both primary and secondary

for db, de in zip(day_beg, day_end):
    
    #date_list defined above as list of dates between January 1, 1951 and Dec 31, 2007
    #mysterious .timetuple().tm_yday function below returns day of year given date.
    if db <= de:
        mymap = map(lambda date: (date.timetuple().tm_yday >= db) & (date.timetuple().tm_yday <= de), date_list)
    else: #if de is earlier in the year than db, then we have to wrap around the new year.
        mymap = map(lambda date: not((date.timetuple().tm_yday > de) & (date.timetuple().tm_yday < db)), date_list)
        
    extract = np.array(list(mymap))
    oc_1 = np.copy(occupancy_1_smth)
    oc_1_mean = np.mean(occupancy_1_smth[extract],0) #this is the mean during time period of interest
    print(oc_1_mean)
    time.sleep(5)
    oc_1[~extract] = oc_1_mean #change all values outside of time period of interest to mean
    #therefore, won't contribute to autocorrelation
    print(oc_1[0:365,5])
    time.sleep(2)
    print(oc_1[0:365,15])
    time.sleep(2)

    
    
    #tau_1.append()
    #tau_2.append()

[ 0.00737002  0.03412641  0.05219899  0.0825763   0.11574141  0.15049267
  0.1704398   0.18183129  0.18317712  0.17822639  0.15827926  0.13381399
  0.1111752   0.09118     0.07128094  0.0605143   0.04633502  0.03600096
  0.02576304  0.01749579  0.01273732  0.01001362]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  1.  0.  1.  1.  1.  1.  0.  1.  0.  1.  1.  0.  1.
  1.  0.  1.  0.  1.  1.  0.  0.  1.  0.  0.  0.  0.  1.  1.  0.  0.  1.
  1.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  1.  1.  1.  1.  0.  0.  0.  0.
  1.  0.  0.  1.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  1.  1.  0.
 

KeyboardInterrupt: 

In [9]:
for p1_b, p1_e, p2_b, p2_e in zip(p1_beg, p1_end, p2_beg, p2_end):

    years_p1= str(p1_b-1900) + str("{0:0=2d}".format((p1_e-1900)%100))
    print(years_p1)
    years_p2= str(p2_b-1900) + str("{0:0=2d}".format((p2_e-1900)%100))
    print(years_p2)
    
    lats_1_p1.append(hovnc['lat_1_hov_smth_' + years_p1][:])
    lats_p1.append(hovnc['lat_hov_smth_' + years_p1][:])
    
    lats_1_p2.append(hovnc['lat_1_hov_smth_' + years_p2][:])
    lats_p2.append(hovnc['lat_hov_smth_' + years_p2][:])
    
hovnc.close()
        
for i, date in enumerate(date_list[0:365]): #go through each day of the year



In [68]:
date_list[5000].timetuple().tm_yday

253