### Function to compute SPI

In [None]:
def sdi(x, sc=12, accumulate= True, seasonality= 0):

    ## Inputs:
    # X: A dataframe of m time series. The size of X = n * (m+2), where n is number of observations and X includes two columns for year
    #    and month
    # sc: time scale for calculating SPI in months
    # accumulate: if True, then the function accumulate the data to the desired timescale before performing other operations
    # seasonality: Either "0", "1" or "2". If 0, the function compute SPI with disregard to seasonality.
    #              If 1 or 2, the function removes seasonality using two different approaches. This is recommended for sc < 12 months.
    #              For seasonality= 1, the algorithm removes the seasonal cycle from all observations by subtracting the monthly climatology, then 
    #                     performing other operations. 
    #              For seasonality= 2, the algorithm does not remove seasonality, but calculate the gringorten plotting position using 
    #                     the time windows that correspond to the same months. For instance, JJA is compared to only other JJA accumulations.
    
    
    ## Helper functions

    def accumulate(x):
        # Accumulate data to the time scale of the drought index (only if accumulate is True)
        if accumulate:
            x_h = hankel(x)
            x_h = x_h[:,:sc]
            x_h[x_h==0] = np.nan
            x_acc = np.nansum(x_h, axis= 1) #This is the accumulated time series of x
            return x_acc

    
    def gringorten(x):
        # Estimate probabilites using gringorten plotting position
        ranked_x = rankdata(x, method='min')    
        p = (ranked_x - 0.44) / (len(x) + 0.12)
        return p

    def norm_trans(p):
        # Transform probabilities to a standard normal distribution 
        y = norm.ppf(p)
       
        return sdi    
    

    if seasonality==0:
        x = x.drop(['year', 'month'], axis=1).values
        x_acc = accumulate(x)
        p = gringorten(x_acc) 
        y = norm.ppf(p) # Transform probabilities to a standard normal distribution

    elif seasonality==1:
        # remove seasonality before calculating spi
        climatology = x.groupby('month').mean()  #use the values of x to compute a monthly climatology
        climatology = climatology.drop(['year'], axis=1) #drop a meaningless year column
        climatology['month'] = climatology.index #create a column for month to be used for merging 
        climatology.index = np.arange(climatology.shape[0]) #change the index of the dataframe from month to count (avoid ambiguity)
        climatology = climatology.rename(columns={climatology.columns[0]: "ltm"}) #rename the column to "ltm", long-term mean

        # merge dataframes (x and climatology)
        dummy = x.merge(climatology, on= 'month')

        x = (dummy.iloc[:,2] - dummy.iloc[:,3]).values
        x_acc = accumulate(x)
        p = gringorten(x_acc)
        y = norm.ppf(p) # Transform probabilities to a standard normal distribution

    elif seasonality==2:
        # Account for seasonality by comparing values of a similar time in the year with each other
        x = x.drop(['year', 'month'], axis=1).values
        x_acc = accumulate(x)

        y = np.zeros(len(x))
        for j in range(11):
            idx = np.arange(j, len(x), 12) # identical time of the year repeats after 12 steps
            p = gringorten(x_acc[idx])
            y[idx] = norm.ppf(p) # Transform probabilities to a standard normal distribution
            
    else:
        print('Invalid value for seasonality')

    
    # Put the final output in the correct order
    sdi = np.empty(y.shape) #standardized drought index 
    sdi[:] = np.nan
    sdi[(sc-1):] = y[:-(sc-1)]
    sdi[:(sc-1)] = np.nan
    
    return sdi