In [None]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os,errno
import sys
import pandas as pd
import scipy.stats as st
%matplotlib inline

dir2='/thorncroftlab_rit/ahenny/rain/'
dir1='/thorncroftlab_rit/ahenny/rain/US/ghcnd_all/'
dir='/thorncroftlab_rit/ahenny/rain/DISSERTATION_SCRIPTS_RESULTS/'

In [None]:
#@author: Michael Schramm on GitHub
#This function is derived from code originally posted by Sat Kumar Tomer
#(satkumartomer@gmail.com)
#See also: http://vsp.pnnl.gov/help/Vsample/Design_Trend_Mann_Kendall.htm

from scipy.stats import norm
import scipy.stats as st
def mk_test(x, alpha=0.05):
    n = len(x)

    # calculate S
    s = 0
    for k in range(n-1):
        for j in range(k+1, n):
            s += np.sign(x[j] - x[k])

    # calculate the unique data
    unique_x, tp = np.unique(x, return_counts=True)
    g = len(unique_x)

    # calculate the var(s)
    if n == g:  # there is no tie
        var_s = (n*(n-1)*(2*n+5))/18
    else:  # there are some ties in data
        var_s = (n*(n-1)*(2*n+5) - np.sum(tp*(tp-1)*(2*tp+5)))/18

    if s > 0:
        z = (s - 1)/np.sqrt(var_s)
    elif s < 0:
        z = (s + 1)/np.sqrt(var_s)
    else: # s == 0:
        z = 0

    # calculate the p_value
    p = 2*(1-norm.cdf(abs(z)))  # two tail test
    h = abs(z) > norm.ppf(1-alpha/2)

    if (z < 0) and h:
        trend = 'decreasing'
    elif (z > 0) and h:
        trend = 'increasing'
    else:
        trend = 'no trend'

    return trend, h, p, z

In [None]:
ds=xr.open_dataset(dir+'extreme_days_ghcnd_winter_95_80.nc')
print(ds)

In [None]:
#names=[dir+'extreme_days_ghcnd_90station_70area.nc',dir+'extreme_days_ghcnd_90station_80area.nc',
#       dir+'extreme_days_ghcnd_90station_90area.nc',dir+'extreme_days_ghcnd_95station_70area.nc',
#      dir+'extreme_days_ghcnd_95station_80area.nc',dir+'extreme_days_ghcnd_95station_90area.nc',
#      dir+'extreme_days_ghcnd_99station_70area.nc',dir+'extreme_days_ghcnd_99station_80area.nc',
#      dir+'extreme_days_ghcnd_99station_90area.nc']

names_winter=[dir+'extreme_days_ghcnd_95_80_winter.nc',dir+'extreme_days_ghcnd_95_90_winter.nc',
      dir+'extreme_days_ghcnd_95_95_winter.nc',dir+'extreme_days_ghcnd_99_80_winter.nc',
      dir+'extreme_days_ghcnd_99_90_winter.nc',dir+'extreme_days_ghcnd_99_95_winter.nc']

names_spring=[dir+'extreme_days_ghcnd_95_80_spring.nc',dir+'extreme_days_ghcnd_95_90_spring.nc',
      dir+'extreme_days_ghcnd_95_95_spring.nc',dir+'extreme_days_ghcnd_99_80_spring.nc',
      dir+'extreme_days_ghcnd_99_90_spring.nc',dir+'extreme_days_ghcnd_99_95_spring.nc']

names_summer=[dir+'extreme_days_ghcnd_95_80_summer.nc',dir+'extreme_days_ghcnd_95_90_summer.nc',
      dir+'extreme_days_ghcnd_95_95_summer.nc',dir+'extreme_days_ghcnd_99_80_summer.nc',
      dir+'extreme_days_ghcnd_99_90_summer.nc',dir+'extreme_days_ghcnd_99_95_summer.nc']

names_fall=[dir+'extreme_days_ghcnd_95_80_fall.nc',dir+'extreme_days_ghcnd_95_90_fall.nc',
      dir+'extreme_days_ghcnd_95_95_fall.nc',dir+'extreme_days_ghcnd_99_80_fall.nc',
      dir+'extreme_days_ghcnd_99_90_fall.nc',dir+'extreme_days_ghcnd_99_95_fall.nc']

names_annual=[dir+'extreme_days_ghcnd_95_80_annual.nc',dir+'extreme_days_ghcnd_95_90_annual.nc',
      dir+'extreme_days_ghcnd_95_95_annual.nc',dir+'extreme_days_ghcnd_99_80_annual.nc',
      dir+'extreme_days_ghcnd_99_90_annual.nc',dir+'extreme_days_ghcnd_99_95_annual.nc']


seasons_list=['Winter','Spring','Summer','Fall','annual']

dp=xr.open_dataset(dir+'station_numbers_95.nc')
stations=dp.stations.values.tolist()
print(len(stations))

yrs_neusa=np.arange(1979,2020,1)

for l1 in range(len(seasons_list)):
    season=seasons_list[l1]
    days_list=[]
    percent_trend_list=[]
    mk_list=[]
    p_value_list=[]
    if season=='Winter':
        filenames=names_winter
        winter_length_list=[]
        for j in range(len(yrs_neusa)):
            if yrs_neusa[j]%4==0:
                winter_length_list.append(91.)
            else:
                winter_length_list.append(90.)
    if season=='Spring':
        filenames=names_spring
        season_length=92.
    if season=='Summer':
        filenames=names_summer
        season_length=92.
    if season=='Fall':
        filenames=names_fall
        season_length=91.
    if season=='annual':
        filenames=names_annual
        season_length=91.
    
    
    for l2 in range(len(filenames)):
        name=filenames[l2]
    
        ds=xr.open_dataset(name)
        dates=ds['dates'].values
        lats=ds['lats']
        lons=ds['lons']
        obs=ds['obs'].values.tolist()
        zipped=list(zip(dates,obs))
        dates_pd=pd.DatetimeIndex(dates)
        dates_unique=dates_pd.unique()
        print(len(dates_unique))
        years=dates_unique.year.values.tolist()

        mean_obs_list=[]
        for i in range(len(dates_unique)):
            #print(i)
            select_date=[x for x in zipped if x[0]==dates_unique[i]]        
            obs_date=[x[-1] for x in select_date]
            obs_defined=[x for x in obs_date if 0<=x<=2000]
            #if len(obs_defined)<len(stations):
            #    print('MISSING VALUES')
            #    print(str(len(obs_defined))+'/'+str(len(stations)))
            mean_obs=float(sum(obs_defined))/float(len(obs_defined))
            mean_obs_list.append(mean_obs)

        zipped_years=list(zip(years,mean_obs_list))
        values_list=[]
        for i in range(len(yrs_neusa)):
            #print(i)
            select_year=[x for x in zipped_years if x[0]==yrs_neusa[i]]
            obs_year=[x[1] for x in select_year]
            if len(obs_year)>1:
                if season=='Winter':
                    total_year=sum(obs_year)*30./winter_length_list[i]
                else:
                    total_year=sum(obs_year)*30./season_length
            if len(obs_year)==1:
                if season=='Winter':
                    total_year=obs_year[0]*30./winter_length_list[i]
                else:
                    total_year=obs_year[0]*30./season_length
            if len(obs_year)==0:
                total_year=0
            values_list.append(total_year)

        r1=st.linregress(np.arange(len(yrs_neusa)),values_list)
        mean_value=float(sum(values_list))/float(len(values_list))
        slope_percent_1b=round(r1[0]/mean_value*100.,2)
        print(slope_percent_1b)
        mk_result=mk_test(values_list,alpha=0.05)
        mk_result1=mk_test(values_list,alpha=0.10)

        days_list.append(len(dates_unique))
        percent_trend_list.append(slope_percent_1b)
        p_value_list.append(round(r1[3],2))
        if mk_result[0] in ['increasing','decreasing']:
            mk_list.append('p<0.05')
        elif mk_result1[0] in ['increasing','decreasing']:
            mk_list.append('p<0.10')
        else:
            mk_list.append('None')

        if 1==0:
            fig=plt.figure(figsize=(8,8))
            ax=plt.subplot(1,1,1)
            ax.plot(yrs_neusa,values_list,color='r',linewidth=1.0,linestyle='solid',label='Fall EP day sum')
            ax.plot(yrs_neusa,[r1[0]*x+r1[1] for x in np.arange(len(yrs_neusa))],color='k',linestyle='--',linewidth=1.5,label='y='+str(round(r1[0],2))+'x+'+str(round(r1[1],2))+', p='+str(round(r1[3],2)))
            ax.tick_params(labelsize=14)
            ax.set_xlabel('Year',fontsize=14)
            ax.set_ylabel('Monthly spatially-averaged precipitation (mm)',fontsize=14)
            ax.set_title('NEUSA Extreme Precipitation Day Totals',fontsize=18)
            plt.legend(loc='upper left',fontsize=12)
            plt.show()

    #plt.close()
    station_thresh_list=['95','95','95','99','99','99']
    area_thresh_list=['80','90','95','80','90','95']

    cell_text=[]
    for i in range(6):
        row=[station_thresh_list[i],area_thresh_list[i],days_list[i],percent_trend_list[i],p_value_list[i],mk_list[i]]
        cell_text.append(row)
    print(cell_text)
    columns=['Station extreme threshold','Total extreme threshold','Number of days','Percent trend','p-value','M-K significance']

    from matplotlib.font_manager import FontProperties
    fig, ax = plt.subplots() 
    ax.set_axis_off() 
    table=ax.table(cellText=cell_text,rowLabels=['']*6,colLabels=columns,cellLoc ='center',loc ='upper left')
    table.auto_set_font_size(False)
    table.set_fontsize(14)
    table.scale(3.2, 1.5)
    for (row, col), cell in table.get_celld().items():
        if (row == 0) or (col == -1):
            #cell.set_text_props(fontproperties=FontProperties(size=17))
            cell.get_text().set_fontsize(20)
            cell.set_text_props(fontproperties=FontProperties(weight='bold'))
        else:
            cell.get_text().set_fontsize(14)
    table.auto_set_column_width(col=list(range(5)))
    if season=='annual':
        ax.text(1.0,1.02,'Fall (annual threshold)',fontsize=20)
    else:
        ax.text(1.1,1.02,season,fontsize=20)
        
    
    plt.show()
    #fig.savefig(dir+'climo_neusa_sensitivity_tables_'+season+'.png',bbox_inches="tight")