# Analysis of GWL projections

In [1]:
#import libraries
%matplotlib notebook
import geopandas as gpd
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import os
from functions import setinputdataset
#from statsmodels.tsa.seasonal import seasonal_decompose
#from scipy.stats import linregress
import numpy as np
from matplotlib_scalebar.scalebar import ScaleBar
import matplotlib.colors as colors
import contextily as cx
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
from functions import mapplots, mapplots2, mapplotsproj
from scipy import stats
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import pymannkendall as mk
from matplotlib.colors import LinearSegmentedColormap
import importlib

In [2]:
patht="D:/Erasmus/Thesis/"
pathdat=patht+"/data/"

pathshp=pathdat+"/SHP/SHP2/"
pathrast=pathdat+"/Raster/"
pathpick=pathdat+"/Pickle/"
pathfig=patht+"/Figures/"

path=r"D:\Erasmus\Thesis\Code\08072022\GW_forecasting_ML\projections/"

In [3]:
#list of wells with NSE>0.5 and r2>0.6
well_list = pd.read_csv(path+"/well_list60.txt")

#Name of climate model projections available (given by the DWD)
Proj_names=["MPI_WRF361H", "MPI_CCLM", "MIROC_CCLM", 
         "HadGEM_WRF361H", "ECE_RACMO_r12", "ECE_RACMO_r1"]
#Import spatial information per well -- results of the Accuracy_plot script
gw_sel_int=gpd.read_file(pathshp+"gw_sel_intmod2.shp")

In [4]:
#Check what happened with these wells
#inds=well_list.loc[well_list.MEST_ID.isin([40000231,9700260,40501280,40501871,40504730, 40502600])].index
#well_list=well_list.drop(index=inds)
gw_sel_proj=gw_sel_int.loc[gw_sel_int.MEST_ID.isin(well_list.MEST_ID)]
gw_sel_proj=gw_sel_proj.loc[(gw_sel_proj['r2_2']>0.7)&(gw_sel_proj['NSE_2']>0.6)].reset_index()

In [17]:
#Administrative boundaries
germany_states = gpd.read_file(pathshp+"DEU_adm1.shp")
ND=germany_states[germany_states.NAME_1== "Niedersachsen"]
waterbodies=gpd.read_file(pathshp+"waterbodiesND.shp")
waterways=gpd.read_file(pathshp+"waterwaysND.shp")
citiesND=gpd.read_file(pathshp+"citiesND2.shp")

#project coordinate sytem
proj_coor=4647
gw_sel_proj=gw_sel_proj.to_crs(epsg=proj_coor)
waterbodies=waterbodies.to_crs(epsg=proj_coor)
waterways=waterways.to_crs(epsg=proj_coor)
bound=germany_states.to_crs(gw_sel_proj.crs.to_string()) 
ND=ND.to_crs(epsg=proj_coor)
cities=citiesND.to_crs(epsg=proj_coor)

## Load GWL projections

In [7]:
count=0
gw_sel_projr=gw_sel_proj.reset_index()
well_list=well_list.reset_index()

for pjname in Proj_names:
    gw_sel_projr[pjname+'_scores']=np.zeros(len(gw_sel_projr))
    prdfs, wellidv=[], []
    for wellid in well_list.MEST_ID:
        projgwl=pd.read_csv(path+'/'+str(wellid)+'/'+'ensemble_mean_values_CNN_'
                            +str(wellid)+'_'+pjname+'.txt', sep=';')
        projgwl.index=pd.to_datetime(projgwl['dates'])
        scores=pd.read_csv(path+'/'+str(wellid)+'/'+pjname+'_scores.txt', sep=',')

        iwell=gw_sel_projr.loc[gw_sel_projr.MEST_ID==wellid].index

        if not iwell.empty:
            # select only R2 to check the projections
            gw_sel_projr.at[iwell[0],pjname+'_scores']=round(scores.R2.values[0],3) 
            prdfs.append(projgwl)
            wellidv.append(iwell[0])

    df=pd.DataFrame({'wellids':wellidv, pjname+'_proj': prdfs})
    gw_sel_projr[pjname+'_proj']=df[pjname+'_proj'].copy()     



## Resample time series to yearly resolution

In [8]:
def loadrawGWL(gwldataf, Well_ID):
    #gwldataf in pickle file as a result of the gap-filling process
    datagw=pd.read_pickle(gwldataf) #Select the gap-filled GWL data
    dfaux=datagw.loc[datagw.wellid==int(Well_ID)].reset_index().GW_NN[0]
    GWDatax=dfaux[[dfaux.columns[0],dfaux.columns[-1]]]
    GWDatax['GWL']=dfaux[dfaux.columns[-1]]
    GWDatax.set_index('DATUM', inplace=True)
    
    return GWDatax

In [9]:
gwlfilleddata=pathpick+"/GWfilldatamod2.pkl"

for pjname in Proj_names:
    r2v, wellidv, gwsel_projyv=[], [] , []
    for wid in range(len(gw_sel_projr)):
        #Load GWL filled data 
        wellid=gw_sel_projr.MEST_ID[wid]
        GWDatax=loadrawGWL(gwlfilleddata,wellid)
        GWData=GWDatax['GWL']

        idate=gw_sel_projr[pjname+'_proj'][wid].Sim.index[0].strftime("%Y-%m")
        fgwmonth=GWData.index[-1].month
        fdate=GWData.index[-1].strftime("%Y-%m") if fgwmonth==12 else GWData.index[-1-fgwmonth].strftime("%Y-%m")
        GWDataf=GWData.loc[(GWData.index.strftime("%Y-%m") >=idate)&
               (GWData.index.strftime("%Y-%m")<=fdate)]
        GWdatar=GWDataf.resample('Y').mean()

        gwsel_projy=gw_sel_projr[pjname+'_proj'][wid].Sim.resample('Y').mean()
        gwsel_projf=gwsel_projy.loc[gwsel_projy.index.strftime("%Y-%m")<=fdate]
        r2=stats.pearsonr(GWdatar, gwsel_projf)[1]**2
        r2v.append(r2)
        #print(pjname)
        #print(r2)
        wellidv.append(wellid)
        gwsel_projyv.append(gwsel_projy)
        
        
    df=pd.DataFrame({'wellids':wellidv , pjname+'_projrs': gwsel_projyv})
    gw_sel_projr[pjname+'_projrs']=df[pjname+'_projrs'].copy()   
    gw_sel_projr[pjname+'_scoresrs']=r2v


## Plot correlation between GWL observations and projections

In [16]:
plt.figure(figsize=(4,3))
for pjname in Proj_names:
    gw_sel_projr[pjname+'_scoresrs'].plot.kde(label=pjname,alpha=0.5)
#plt.ylim([0,3.8])
plt.xlabel('r$^2$')
plt.grid(True, alpha=0.1)
plt.legend()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [60]:
plt.figure(figsize=(6,5))
for pjname in Proj_names:
    gw_sel_projr[pjname+'_scores'].plot.kde(label=pjname,alpha=0.5)
plt.ylim([0,3.8])
plt.xlabel('r$^2$')
plt.grid(True, alpha=0.1)
plt.legend()
plt.tight_layout()


<IPython.core.display.Javascript object>

## Trend

__Mann-Kendall test__ -- for seasonal time series\
-_trend_: tells the trend (increasing, decreasing or no trend)\
-_h_: True (if trend is present) or False (if the trend is absence)\
-_p_: p-value of the significance test\
-_z_: normalized test statistics\
-_Tau_: Kendall Tau\
-_s_: Mann-Kendal's score\
-_var_s_: Variance S\
-_slope_: Theil-Sen estimator/slope\
-_intercept_: intercept of Kendall-Theil Robust Line, for seasonal test, full period cycle consider as unit time step

Hussain et al., (2019). pyMannKendall: a python package for non parametric Mann Kendall family of trend tests.. Journal of Open Source Software, 4(39), 1556, https://doi.org/10.21105/joss.01556

In [10]:
datei=pd.to_datetime('01012020', format='%d%m%Y')
datem=pd.to_datetime('01012050', format='%d%m%Y')
datem2=pd.to_datetime('01012060', format='%d%m%Y')
datef=pd.to_datetime('01012090', format='%d%m%Y')
datef2=gw_sel_projr[pjname+'_projrs'][0].index[-1]
deltyrnf=(datem.year-datei.year)
deltyrff=(datef.year-datem2.year)
deltyr=(datef2.year-datei.year)

In [11]:
period=1
alpha=0.05
for pjname in Proj_names:
    
    gw_sel_projr[pjname+'_yr_slopenf']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_trendnf']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_pvalnf']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_deltanf']=np.zeros(len(gw_sel_projr)) 
    gw_sel_projr[pjname+'_yr_slopeff']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_trendff']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_pvalff']=np.zeros(len(gw_sel_projr)) 
    gw_sel_projr[pjname+'_yr_deltaff']=np.zeros(len(gw_sel_projr)) 
    gw_sel_projr[pjname+'_yr_slope']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_trend']=np.zeros(len(gw_sel_projr))
    gw_sel_projr[pjname+'_yr_pval']=np.zeros(len(gw_sel_projr)) 
    gw_sel_projr[pjname+'_yr_delta']=np.zeros(len(gw_sel_projr))     
    
    for i in range(len(gw_sel_projr[pjname+'_projrs'])):
        dfproj=gw_sel_projr[pjname+'_projrs'][i]
        nearfuture=dfproj.loc[(dfproj.index>datei)&
                              (dfproj.index<datem)]
        farfuture=dfproj.loc[dfproj.index>datem2]
        
        future=dfproj.loc[dfproj.index>datei]
        
        overlap=dfproj.loc[(dfproj.index<datei)]
        overlapff=dfproj.loc[(dfproj.index>pd.to_datetime('01012045', format='%d%m%Y')) 
                             & (dfproj.index<datem2)]


        #Seasonal Mann-Kendall test also developed to remove the effect of seasonality
        trendnf, hnf, pnf, znf, Taunf, snf, var_snf, slopenf, interceptnf =mk.yue_wang_modification_test(nearfuture)
        trendff, hff, pff, zff, Tauff, sff, var_sff, slopeff, interceptff =mk.yue_wang_modification_test(farfuture)
        trend, h, p, z, Tau, s, var_s, slope, intercept =mk.yue_wang_modification_test(future)
        
        gw_sel_projr.at[i,pjname+'_yr_slopenf']=slopenf
        gw_sel_projr.at[i,pjname+'_yr_trendnf']=trendnf
        gw_sel_projr.at[i,pjname+'_yr_pvalnf']=pnf
        gw_sel_projr.at[i,pjname+'_yr_deltanf']=(slopenf*deltyrnf)/np.abs(overlap.mean())*100
        
        gw_sel_projr.at[i,pjname+'_yr_slopeff']=slopeff
        gw_sel_projr.at[i,pjname+'_yr_trendff']=trendff
        gw_sel_projr.at[i,pjname+'_yr_pvalff']=pff     
        gw_sel_projr.at[i,pjname+'_yr_deltaff']=(slopeff*deltyrff)/np.abs(overlapff.mean())*100

        gw_sel_projr.at[i,pjname+'_yr_slope']=slope
        gw_sel_projr.at[i,pjname+'_yr_trend']=trend
        gw_sel_projr.at[i,pjname+'_yr_pval']=p    
        gw_sel_projr.at[i,pjname+'_yr_delta']=(slope*deltyr)/np.abs(overlap.mean())*100        
        
    #Exclude the values with no significant trend 
    gw_sel_projr[pjname+'_yr_deltanf2'] = [np.nan if y == 'no trend' else x for x,y in zip(gw_sel_projr[pjname+'_yr_deltanf'],
                                                                                       gw_sel_projr[pjname+'_yr_trendnf'] )]
    gw_sel_projr[pjname+'_yr_deltaff2'] = [np.nan if y == 'no trend' else x for x,y in zip(gw_sel_projr[pjname+'_yr_deltaff'],
                                                                                       gw_sel_projr[pjname+'_yr_trendff'] )]
    gw_sel_projr[pjname+'_yr_delta2'] = [np.nan if y == 'no trend' else x for x,y in zip(gw_sel_projr[pjname+'_yr_delta'],
                                                                                       gw_sel_projr[pjname+'_yr_trend'] )]


        #print(trendnf,trendff)

In [76]:
#Save dataframe to pickle
#gw_sel_projr.to_pickle(pathpick+'gw_sel_projyr.pkl') 

In [14]:
lnames=[' '*30+'ECE_RACMO_r12',' ',' ',
        ' '*30+'ECE_RACMO_r1',' ',' ',
        ' '*30+'HadGEM_WRF361H',' ',' ',
        ' '*30+'MPI_WRF361H', ' ',' ',
        ' '*30+'MPI_CCLM', ' ',' ',
        ' '*30+'MIROC_CCLM',  ' ',' ']
col=['ECE_RACMO_r12_yr_deltanf2','ECE_RACMO_r12_yr_deltaff2','ECE_RACMO_r12_yr_delta2',
     'ECE_RACMO_r1_yr_deltanf2','ECE_RACMO_r1_yr_deltaff2','ECE_RACMO_r1_yr_delta2',
     'HadGEM_WRF361H_yr_deltanf2','HadGEM_WRF361H_yr_deltaff2','HadGEM_WRF361H_yr_delta2',
     'MPI_WRF361H_yr_deltanf2', 'MPI_WRF361H_yr_deltaff2','MPI_WRF361H_yr_delta2',
     'MPI_CCLM_yr_deltanf2', 'MPI_CCLM_yr_deltaff2','MPI_CCLM_yr_delta2',
     'MIROC_CCLM_yr_deltanf2',  'MIROC_CCLM_yr_deltaff2',  'MIROC_CCLM_yr_delta2']
data=gw_sel_projr[col]
long=[str(len(gw_sel_projr[c].dropna())) for c in col]
maxv=30
sns.set_theme(style="darkgrid")
fig, ax = plt.subplots(1,6,figsize=(11,4), sharey=True)

colors = ['#5ed5f8', '#5e7af8','#DD6060']

c=0
for i in range(6):
    dataf=data[col[c:c+3]]
    ax[i] = sns.boxplot(data=dataf, palette=colors, linewidth=0.5, 
                     fliersize=1,width=0.7, ax=ax[i])
    cou=c
    for xtick in ax[i].get_xticks():
        ax[i].text(xtick,10*0.93,long[cou],
                horizontalalignment='center',size='10',color='k')
        cou+=1
        
    ax[i].set_xticklabels(lnames[c:c+3],rotation=0, fontsize=9)


    #ax.set_ylim([-40,40])
    #ax.set_xticklabels([" Short-term: 2021-2070"," Long-term: 2070-2100"])
    #plt.hlines(0,0,17,'r', lw=0.5, linestyles='--')
    #ax.set_aspect(0.03)
    c+=3
#plt.xticklabels(lnames[c:c+3],rotation=0)   
#plt.set_xticklabels(ax.get_xmajorticklabels(), fontsize = 8)
#ax.set_yticklabels(ax.get_yticks(), fontsize = 8)
#fig.text(0.5, 0.004, 'Climate models', ha='center')
fig.text(0.07, 0.5, '$\Delta$GWL (%)- Annual', va='center', rotation='vertical')
plt.ylim([-10,10])
#fig.supylabel('$\Delta$GWL (%)')
#fig.supxlabel('Climate models')
fig.subplots_adjust(wspace=.01)
#plt.tight_layout()
#plt.savefig(path+"boxplot_yr.pdf",bbox_inches="tight")
#plt.savefig(path+"boxplot_yr2.png",bbox_inches="tight",dpi=300)

<IPython.core.display.Javascript object>

In [15]:
cmdfs=gw_sel_projr[col]

l1=cmdfs.where(cmdfs<-100).dropna(axis=0, how='all').index
l2=cmdfs.where(cmdfs>100).dropna(axis=0, how='all').index
lind=l1.append(l2).unique()
gw_sel_projrc=gw_sel_projr.drop(index=lind)

In [33]:
gw_sel_projrcnf[proj+'_yr_deltaff'].min()

-1.8078766806179944

In [37]:
#proj='ECE_RACMO_r12'
#proj='MPI_WRF361H'
#proj='MPI_CCLM'
#proj='MIROC_CCLM'
proj='HadGEM_WRF361H'
#proj='ECE_RACMO_r12'

#listproj=['HadGEM_WRF361H','ECE_RACMO_r12','MPI_CCLM','MPI_WRF361H','ECE_RACMO_r1','MIROC_CCLM']

#for proj in listproj:
    
importlib.reload(plt); importlib.reload(sns)
sns.set_style("ticks")

gw_sel_projrcnf=gw_sel_projrc.loc[gw_sel_projrc[proj+'_yr_trendnf']!='no trend']
gw_sel_projrcff=gw_sel_projrc.loc[gw_sel_projrc[proj+'_yr_trendff']!='no trend']
cmap = LinearSegmentedColormap.from_list( 'mycmap', 
                                        [(0, '#780000'),(0.5, '#df817a'), (0.6, '#90e0ef'),(1, '#023e8a')])

max_of_min=min(gw_sel_projrcff[proj+'_yr_deltaff'].min(), gw_sel_projrcnf[proj+'_yr_deltanf'].min() )
minlim=-30 if max_of_min< -30 else max_of_min
classification_kwds=dict(bins=[-2,-1,-.5,0,.5,1,2])
classification_kwds2=dict(bins=[-2,-1,-.5,0,.5,1,2])
#classification_kwds=dict(bins=[minlim,-3,-1,0,1,3,10]) #HadGEM_WRF361H
legend_kwds={'loc': 3, 'fmt':"{:.1f}",'interval': True, 'fontsize':10,
                     'frameon':False,'title_fontsize':11}

fig, (gw,gw2) = plt.subplots(nrows=1,ncols=2, sharex=False, figsize=(14, 8),linewidth=0.01)
gw=mapplotsproj(column=proj+'_yr_deltanf',bound=bound,axs=gw,gw_s=gw_sel_projrcnf, 
               scheme='User_Defined',
               cities=cities,waterbodies=waterbodies,waterways=waterways,ND=ND, 
               cmap=cmap, marker='h',
               classification_kwds=classification_kwds, ms=20,
               legend_title='$\Delta$GWL(2020-2050)',legend_kwds=legend_kwds)

gw=mapplotsproj(column=proj+'_yr_deltanf',color='#939393',
               bound=bound,axs=gw,gw_s=gw_sel_projr, marker='v',ms=10,zorder=1)


[gw.spines[axis].set_linewidth(0.5) for axis in ['top', 'bottom', 'left', 'right']]

gw2=mapplotsproj(column=proj+'_yr_deltaff',bound=bound,axs=gw2,gw_s=gw_sel_projrcff,
                scheme='User_Defined',
                cities=cities,waterbodies=waterbodies,waterways=waterways,ND=ND, 
                cmap=cmap, marker='h' ,
                classification_kwds=classification_kwds2, ms=20,  
                legend_title='$\Delta$GWL(2060-2090)',legend_kwds=legend_kwds )

gw2=mapplotsproj(column=proj+'_yr_deltaff',color='#939393',
               bound=bound,axs=gw2,gw_s=gw_sel_projr, marker='v',ms=10,zorder=1)


[gw2.spines[axis].set_linewidth(0.5) for axis in ['top', 'bottom', 'left', 'right']]
plt.tight_layout() 
plt.savefig(pathfig+proj+"_delta_Ext_yr.jpg",bbox_inches="tight",dpi=200)


<IPython.core.display.Javascript object>

In [20]:
listproj=['HadGEM_WRF361H','ECE_RACMO_r12','MPI_CCLM','MPI_WRF361H','ECE_RACMO_r1','MIROC_CCLM']
listproj=['HadGEM_WRF361H','ECE_RACMO_r12']
for proj in listproj:
    importlib.reload(plt); importlib.reload(sns)
    sns.set_style("ticks")

    gw_sel_projrcc=gw_sel_projrc.loc[~gw_sel_projrc[proj+'_yr_delta2'].isnull()]
    cmap = LinearSegmentedColormap.from_list( 'mycmap', 
                                             [(0, '#780000'),(0.5, '#df817a'), (0.6, '#90e0ef'),(1, '#023e8a')])
    
    max_of_min=max(gw_sel_projrcc[proj+'_yr_delta'].min(), gw_sel_projrcc[proj+'_yr_delta'].min() )
    minlim=-30 if max_of_min< -30 else max_of_min
    classification_kwds=dict(bins=[minlim,-3,-1,-0.5,0,0.5,1,3]) #HadGEM_WRF361H
    #classification_kwds=dict(bins=[minlim,-3,-1,0,1,3,10]) #ECE_RACMO_r12
    legend_kwds={'loc': 3, 'fmt':"{:.1f}",'interval': True, 'fontsize':10,
                     'frameon':False,'title_fontsize':11}

    fig, (gw) = plt.subplots(nrows=1,ncols=1, sharex=False, figsize=(7, 7),linewidth=0.01)
    gw=mapplotsproj(column=proj+'_yr_delta',bound=bound,axs=gw,gw_s=gw_sel_projrcc, 
                    scheme='User_Defined',
                    cities=cities, waterbodies=waterbodies,waterways=waterways,ND=ND, 
                    cmap=cmap, marker='h' ,
                    classification_kwds=classification_kwds, ms=20, 
                   legend_title='$\Delta$GWL(2020-2100)',legend_kwds=legend_kwds)
    
    gw=mapplotsproj(column=proj+'_yr_delta',color='#939393',
                   bound=bound,axs=gw,gw_s=gw_sel_projr, marker='v',ms=10,zorder=1)


    [gw.spines[axis].set_linewidth(0.5) for axis in ['top', 'bottom', 'left', 'right']]

    plt.tight_layout() 
    plt.savefig(pathfig+proj+"_all_mk.jpg",bbox_inches="tight",dpi=200)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Heatmaps

In [70]:
wellid=well_list.MEST_ID[0]
projname=Proj_names[0]

In [76]:
projgwl=pd.read_csv(path+'/'+str(wellid)+'/'+'ensemble_mean_values_CNN_'+str(wellid)+'_'+projname+'.txt', sep=';')
projgwl.index=pd.to_datetime(projgwl['dates'])
scores=pd.read_csv(path+'/'+str(wellid)+'/'+projname+'_scores.txt', sep=',')


In [73]:
plt.figure(figsize=(10,3))
#plt.plot(hyrasdfcomp.index,hyrasdfcomp['pr'],label='HYRAS',color='darkblue')
#for n in range(len(Projection_name)):
plt.plot(projgwl.index,projgwl['Sim'])
#plt.legend(loc=1)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1c5dc2d5b20>]

In [78]:
projgwl["DATE"]= projgwl.index
projgwl["Year"] = projgwl.DATE.apply(lambda x: x.year)
projgwl["Month"] = projgwl.DATE.apply(lambda x: x.strftime("%B"))
pt=projgwl.pivot_table(index="Year",columns="Month",values="Sim", aggfunc="sum").fillna(0)
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
#pt = pt.reindex_axis(months)
y_axis_labels=np.arange(2014,2100,20)
sns.heatmap(pt, annot=False, cmap='RdYlBu_r',xticklabels=months, alpha=0.7)
plt.yticks(np.arange(0,85,6), np.arange(2014,2100,6))

([<matplotlib.axis.YTick at 0x1c5d7da82e0>,
  <matplotlib.axis.YTick at 0x1c5d7da9850>,
  <matplotlib.axis.YTick at 0x1c5dc2cf610>,
  <matplotlib.axis.YTick at 0x1c5d7d95190>,
  <matplotlib.axis.YTick at 0x1c5d7d95580>,
  <matplotlib.axis.YTick at 0x1c5d7d95160>,
  <matplotlib.axis.YTick at 0x1c5d7dcf220>,
  <matplotlib.axis.YTick at 0x1c5d6ae22b0>,
  <matplotlib.axis.YTick at 0x1c5dbf5d520>,
  <matplotlib.axis.YTick at 0x1c5dc2d5e20>,
  <matplotlib.axis.YTick at 0x1c5dbef61f0>,
  <matplotlib.axis.YTick at 0x1c5dbef6940>,
  <matplotlib.axis.YTick at 0x1c5d7d9f0d0>,
  <matplotlib.axis.YTick at 0x1c5dbef6a30>,
  <matplotlib.axis.YTick at 0x1c5d7d9f700>],
 [Text(0, 0, '2014'),
  Text(0, 6, '2020'),
  Text(0, 12, '2026'),
  Text(0, 18, '2032'),
  Text(0, 24, '2038'),
  Text(0, 30, '2044'),
  Text(0, 36, '2050'),
  Text(0, 42, '2056'),
  Text(0, 48, '2062'),
  Text(0, 54, '2068'),
  Text(0, 60, '2074'),
  Text(0, 66, '2080'),
  Text(0, 72, '2086'),
  Text(0, 78, '2092'),
  Text(0, 84, '2098

In [None]:

most_sightings_years = projgwl.index.dt.year.value_counts().head(10)

month_vs_year = df.pivot_table(columns=df['Date_time'].dt.month,
                               index=df['Date_time'].dt.year.apply(is_top_years),
                               aggfunc='count',values='city')
month_vs_year.columns = month_vs_year.columns.astype(int)

plt.figure(figsize=(10,8))
ax = sns.heatmap(month_vs_year, vmin=0, vmax=4)
ax.set_xlabel('Month').set_size(20)
ax.set_ylabel('Year').set_size(20)