Functions for analysis

In [4]:
## Functions for reading in data ##

def CheckLeap(Year):  
  # Checking if the given year is leap year  
    if((Year % 400 == 0) or  
     (Year % 100 != 0) and  
     (Year % 4 == 0)):   
        return True  
  # Else it is not a leap year  
    else:  
        return False
    
# Python3 implementation of the approach
days = [31, 28, 31, 30, 31, 30,
        31, 31, 30, 31, 30, 31];
 
# Function to return the day number
# of the year for the given date
def dayOfYear(date):
     
    # Extract the year, month and the
    # day from the date string
    year = (int)(date[0:4]);
    month = (int)(date[5:7]);
    day = (int)(date[8:]);
 
    # If current year is a leap year and the date
    # given is after the 28th of February then
    # it must include the 29th February
    if (month > 2 and year % 4 == 0 and
       (year % 100 != 0 or year % 400 == 0)):
        day += 1;
 
    # Add the days in the previous months
    month -= 1;
    while (month > 0):
        day = day + days[month - 1];
        month -= 1;
    return day;

def dayOfYearAccountingForLeapYear(date):
    
    # If the year is a leap year, this function returns days 1...59 60 61...366
    # If not a leap year, this function returns days 1...59 61...366
     
    # Extract the year, month and the
    # day from the date string
    year = (int)(date[0:4]);
    month = (int)(date[5:7]);
    day = (int)(date[8:]);
 
    # If current year is a leap year and the date
    # given is after the 28th of February then
    # it must include the 29th February
    if (month > 2 and year % 4 == 0 and
       (year % 100 != 0 or year % 400 == 0)):
        day += 1;
 
    # Add the days in the previous months
    month -= 1;
    while (month > 0):
        day = day + days[month - 1];
        month -= 1;
        
    if not ((year % 4 == 0 and
       (year % 100 != 0 or year % 400 == 0))):
        if day >=60:
            day +=1
    
    return day;



def findYearIndices(startYear,endYear,dates):
    # Start and end year correspond to the year in the spring of the winter
    # The first data point is in Jan 1930, so the start year is 1930
    # ex. a start of year of 2001 would start with the winter season of 2000-2001
    # inclusive of endYear
    
    yearRange = np.arange(startYear,endYear+1)
    
    yearIndices = []
    for y in yearRange:
        currYearIndices = []
        for di in range(len(dates)):
            cond1 = float(dates[di][0:4]) == y and int(dates[di][5:7])<9
            cond2 = float(dates[di][0:4]) == y-1 and int(dates[di][5:7])>=9
            if cond1 or cond2:
                currYearIndices = currYearIndices + [di]
        yearIndices = yearIndices + [currYearIndices]
    return yearIndices

def fillInClim(datesByWinter,snowDepthByYears):
    '''Fill in each missing day for each year in the climatology with nan'''
    
    for iy in range(len(datesByWinter)):
        
        for dayVal in range(1,367):
            checkIfIn = sum(np.isin(datesByWinter[iy],dayVal))
            if checkIfIn == 0:
                # Find the index of the previous value
                
                if dayVal==1:
                    indForMissingDay = 0
                else:
                    indForMissingDay = tuple(np.argwhere(datesByWinter[iy] == dayVal-1)[0])
                datesByWinter[iy] = np.insert(datesByWinter[iy],indForMissingDay,dayVal)
                snowDepthByYears[iy] = np.insert(snowDepthByYears[iy],indForMissingDay,np.nan)
    return snowDepthByYears


def fillInNans(yearlyTimeSeries):
    '''fft can only be used when no NANs are present in the data. Fill in missing values using linear interpolation.
    Note that this function sets snowpack to 0 before October 15 and after May 15. While this does 0-out some data,
    I don't think the impacts are significant.
    '''
    x  = yearlyTimeSeries
    #x[dayOfYear('2001-05-15')+122-1:] = 0
    #x[:dayOfYear('2000-10-01')+122-366-1] = 0
    xi = np.arange(len(x))

    mask = np.isfinite(x)
    xfiltered = np.interp(xi, xi[mask], x[mask])

    return xfiltered

def ampFromFFT(yearlyTimeSeries):
    '''This function returns the amplitude of the Fourier components as a function of frequency.
    It assumes daily sampling.
    '''
    sampleFrequency = 366 # samples per year
    timeOverYear = np.arange(1,367)
    freq = np.arange(0,sampleFrequency,sampleFrequency/len(timeOverYear))
    N = len(yearlyTimeSeries) # Number of samples
    
    # Amplitude Spectrum from FFT
    xk = abs(np.fft.fft(yearlyTimeSeries))/N; # Two-sided amplitude
    xk = xk[0:int(N/2)]; # One-sided
    xk[1:-2] = 2*xk[1:-2]; # Double values except for DC and Nyquist (no longer two-sided)

    freqHalf = freq[1:int(N/2+1)]
    
    return xk,freqHalf

def THD(amplitudes):
    '''Take in a list of Fourier component amplitudes and calculate the total harmonic distortion.
    The data is assumed to have the DC component as the first index.
    '''
    withNoDC = amplitudes[1:]
    V1 = withNoDC[0]
    num = np.sqrt(np.sum(withNoDC[1:]**2))
    
    return num/V1


# Moving Average
def movingaverage(y, N):
    # N is the window length
    y_padded = np.pad(y, (N//2, N-1-N//2), mode='edge')
    y_smooth = np.convolve(y_padded, np.ones((N,))/N, mode='valid') 
    return y_smooth

# Calculate median and interquartile range
def medIQR(a):
    return np.median(a), np.percentile(np.array(maxSnowPackByWinter),[25, 75])

# Calculate mean and standard deviation
def meanSTD(a):
    return np.mean(a), np.std(a)