# Annual Maxima - Generalized Extreme Values distribution
Precipitation database

* [1. Input Precipitation](#1)
	* [1.1. Load Dataset](#11)  
	* [1.2. Data Visualization: Time Series](#12)     
	* [1.3. Data Visualization: Histograms](#13)     
    
* [2. Annual Maxima Extremes](#2)
	* [2.1. Calculate Annual Maxima](#21)
    * [2.2. Data visualization - Daily and Annual precipitation](#22)
	* [2.3. Fit Annual Maxima to Generalized Extreme Value](#23)
    * [2.4. Simulate GEV parameters](#24)
    * [2.5. Uncertainty of shape parameter](#25)
    
* [3. Simulate Precipitation](#3)
	* [3.1. Use simulated GEVs to generate Precipitation Annual Maxima](#31)
	* [3.2. Plot Return Period](#32)
    
<hr size="5"/>

In [None]:
import os
import os.path as op

import numpy as np
from numpy.random import multivariate_normal
import pandas as pd
from scipy.stats import  gumbel_l, genextreme

# plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff


## 1. Input Precipitation <a class="anchor" id="1"></a>

### 1.1. Load Dataset <a class="anchor" id="11"></a>

In [None]:
# path
p_db = op.join(os.getcwd(),'..','..','data','Precipitation_Cantabria')

# read database precipitation (xls file)
p_dat = op.join(p_db, '1083e_R.xls')

data = pd.read_excel(
    p_dat,
    header = None, 
    names = ['Precipitation']
)

# set dataframe time index
data.index =  np.arange('1970-10-01', '2003-10-01', dtype='datetime64[D]')
data.index.name = 'time'


### 1.2. Data Visualization: Time Series <a class="anchor" id="12"></a>

In [None]:
px.line(data)

### 1.3. Data Visualization - Histograms <a class="anchor" id="13"></a>

In [None]:
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=['Count', 'Probability Density'],
)
fig.append_trace(
    go.Histogram(x = data['Precipitation'], nbinsx = 30), 
    1, 1,
)
fig.append_trace( 
    go.Histogram(x = data['Precipitation'], nbinsx = 30, histnorm='probability density'), 
    1, 2
)
fig.update_layout(showlegend=False)
fig.show()


## 2. Annual Maxima Extremes <a class="anchor" id="2"></a>

### 2.1. Calculate Annual Maxima <a class="anchor" id="21"></a>

In [None]:
# set dataset hydrologic year (10-01)
data_month = pd.DatetimeIndex(data.index).month
data_day = pd.DatetimeIndex(data.index).day

# generate hydrologic year indexes
split = np.where((data_month==10) & (data_day==1))[0]
yh = np.zeros(len(data))
for c, v in enumerate(split[:-1]):
    yh[split[c]:split[c+1]] = 1970 + c
yh[split[-1]:] = 1970 + len(split)-1

data.index.hydro = yh


In [None]:
# Calculate Annual Maxima with pandas groupby and agg functions
amax = data.groupby(by=[data.index.hydro]).agg(
    Fmax = ('Precipitation', lambda data: data.idxmax()),
    Pmax = ('Precipitation', lambda data: data.max())
)

# Plot Precipitation Annual Maxima time series
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = data.index, y = data['Precipitation'],
        mode ='lines', name = 'Historic',
    )
)
fig.add_trace(
    go.Scatter(
        x = amax['Fmax'], y = amax['Pmax'],
        mode ='markers', name = 'Annual Maxima',
    )
)
fig.update_layout(    
    xaxis_title = "time",
    yaxis_title = "Precipitation (mm/d)",
    yaxis=dict(rangemode='nonnegative')
)
fig.show()


### 2.2. Data visualization - Daily and Annual precipitation <a class="anchor" id="22"></a>

In [None]:
# annual maxima series 
var_max = amax['Pmax'].values[:] 

fig = go.Figure()
fig.add_trace( 
    go.Histogram(x = data['Precipitation'], nbinsx = 100, histnorm='probability density', name='Daily')
)
fig.add_trace(
    go.Histogram(x=var_max, nbinsx = 30, histnorm='probability density', name='Annual')
)

fig.update_layout(    
    xaxis_title = "x",
    yaxis_title = "P(x)",
    title = 'Probability Density Functions',
    showlegend=True, 
    barmode='overlay'
)
fig.show()

### 2.3. Fit Annual Maxima to Generalized Extreme Value <a class="anchor" id="23"></a>

In [None]:
# fit data to GEV
shape, loc, scale = genextreme.fit(var_max)
print(shape, loc, scale)

# negative loglikelihood
nLogL = genextreme.nnlf((shape, loc, scale), var_max)

# GEV parameters
theta = (shape, loc, scale)

# freeze GEV with parameters, get GEV PDF
rv = genextreme(shape, loc, scale)  
x = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 1000)
y = rv.pdf(x)


In [None]:
# Plot GEV PDF vs data probability density histogram

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='PDF', marker_color='black'))
fig.add_trace(go.Histogram(x=var_max, nbinsx = 30, histnorm='probability density', name='Historical'))

fig.update_layout(    
    xaxis_title = "x",
    yaxis_title = "P(x)",
    title = 'GEV Probability Density Function',
    yaxis=dict(rangemode='nonnegative')
)
fig.show()


In [None]:
# simulate some values for this GEV
size_sim = 1000
var_sim = genextreme.rvs(shape, loc, scale, size=size_sim)

# Plot GEV pdf vs simulated data
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='PDF', marker_color='black'))
fig.add_trace(go.Histogram(x=var_sim, nbinsx = 100, histnorm='probability density', name='Simulation'))

fig.update_layout(    
    xaxis_title = "x",
    yaxis_title = "P(x)",
    title = 'GEV Probability Density Function',
    yaxis=dict(rangemode='nonnegative')
)
fig.show()


### 2.4. Simulate GEV parameters <a class="anchor" id="24"></a>

In [None]:
def ACOV(f, theta, x):
    '''
    Returns asyntotyc variance matrix using Fisher Information matrix inverse
    Generalized functions, parameters and data.

    f      - function to evaluate: GEV, GUMBELL, ...
    theta  - function parameters: for GEV (shape, location, scale)
    x      - data used for function evaluation

    Second derivative evaluation - variance and covariance
    dxx = (f(x+dt_x) - 2f(x) + f(x-dt_x)) / (dt_x**2)
    dxy = (f(x,y) - f(x-dt_x,y) - f(x,y-dt_y) + f(x-dt_x, u-dt_y)) / (dt_x*dt_y)
    '''

    # parameters differential
    pm = 0.00001
    params = np.asarray(theta)
    dt_p = pm * params

    # Fisher information matrix holder 
    ss = len(params)
    FI = np.ones((ss,ss)) * np.nan

    if np.isinf(f(theta, x)):
        print ('ACOV error: nLogL = Inf. {0}'.format(theta))
        return np.ones((ss,ss))*0.0001

    # variance and covariance
    for i in range(ss):

        # diferential parameter FI evaluation
        p1 = np.asarray(theta); p1[i] = p1[i] + dt_p[i]
        p2 = np.asarray(theta); p2[i] = p2[i] - dt_p[i]

        # variance
        FI[i,i] = (f(tuple(p1), x) - 2*f(theta,x) + f(tuple(p2), x))/(dt_p[i]**2)

        for j in range(i+1,ss):

            # diferential parameter FI evaluation
            p1 = np.asarray(theta); p1[i] = p1[i] - dt_p[i]
            p2 = np.asarray(theta); p2[j] = p2[j] - dt_p[j]
            p3 = np.asarray(theta); p3[i] = p3[i] - dt_p[i]; p3[j] = p3[j] - dt_p[j]

            # covariance
            cov = (f(theta,x) - f(tuple(p1),x) - f(tuple(p2),x) + f(tuple(p3),x)) \
                    / (dt_p[i]*dt_p[j])
            FI[i,j] = cov
            FI[j,i] = cov

    # asynptotic variance covariance matrix
    acov = np.linalg.inv(FI)

    return acov


In [None]:
# number of GEV simulations
n_sims = 1000

# GEV Loglikelihood function covariance
acov = ACOV(genextreme.nnlf, theta, var_max)

# GEV params used for multivar. normal random generation
theta_gen = np.array(theta)
theta_sim = multivariate_normal(theta_gen, acov, n_sims)


In [None]:
df_theta = pd.DataFrame({
    'shape':list(theta_sim[:,0]) + [theta[0]],
    'loc':list(theta_sim[:,1]) + [theta[1]],
    'scale':list(theta_sim[:,2]) + [theta[2]],
    'gen': list(np.tile('sim', len(theta_sim[:,0]))) + ['gen'],
    'color':list(np.zeros(len(theta_sim[:,0]))) + [1],
    'alpha':list(np.ones(len(theta_sim[:,0]))*0.7) + [1],
})

# plot 3D simulated GEV distribution
fig = px.scatter_3d(df_theta, x='shape', y='loc', z='scale',
              color='gen', opacity=0.7)
fig.update_layout(   
    width=1000,
    height=600,
    title = 'Simulated GEV parameters',
)
fig.update_traces(marker=dict(size=4))
fig.show()

In [None]:
from scipy import stats


In [None]:
kde = stats.gaussian_kde(theta_sim.T)
density = kde(theta_sim.T)


In [None]:
# plot 2D simulated GEV distribution
fig = go.Figure(
    data = go.Splom(
        dimensions=[
            dict(label = 'Shape', values = df_theta['shape']),
            dict(label = 'Location', values = df_theta['loc']),
            dict(label = 'Scale', values = df_theta['scale']),

        ],
        marker = dict(
                color = df_theta['color'],
                colorscale=['#636EFA', '#ff7f0e'],
                opacity=df_theta['alpha']),
        diagonal_visible = False,
        showupperhalf = False,
    )
)
fig.update_layout(
    title='Simulated GEV Parameters',
    width=700,
    height=700,
)
fig.show()


In [None]:
# scatter-plot matrix
fig = ff.create_scatterplotmatrix(
    df_theta[['shape', 'loc', 'scale', 'color']], 
    index='color',
    diag='histogram',
    colormap='Blues', 
    colormap_type='seq',
    height=800, width=800
)

fig.update_traces(marker_showscale=False)
fig.show()

### 2.5. Uncertainty of shape parameter <a class="anchor" id="25"></a>

In [None]:
theta_gen

In [None]:
# Monte Carlo
[np.percentile(theta_sim[:,0], 2.5), np.percentile(theta_sim[:,0], 97.5)]

In [None]:
# analytically
[theta[0] - 1.96 * np.sqrt(acov[0,0]), theta[0] + 1.96 * np.sqrt(acov[0,0])]

## 3. Simulate Precipitation <a class="anchor" id="3"></a>

### 3.1. Use simulated GEVs to generate Precipitation Annual Maxima <a class="anchor" id="31"></a>

In [None]:
years_sim = 100  # years of precipitation to simulate

# use simulated GEVs
var_sim = np.zeros((theta_sim.shape[0], years_sim))*np.nan  # initialize output numpy array
for c, ts in enumerate(theta_sim):
    var_sim[c,:] = genextreme.rvs(*ts, size=years_sim)

# generate a time array for simulated data
time_sim = np.arange('1970-10-01', '{0}-10-01'.format(1970+years_sim), dtype='datetime64[Y]')


In [None]:
years_sim = 100  # years of precipitation to simulate

# use simulated GEVs
var_sim = np.zeros((theta_sim.shape[0], years_sim))*np.nan  # initialize output numpy array
for c, ts in enumerate(theta_sim):
    var_sim[c,:] = genextreme.rvs(*ts, size=years_sim)

# generate a time array for simulated data
time_sim = np.arange('1970-10-01', '{0}-10-01'.format(1970+years_sim), dtype='datetime64[Y]')


### 3.2. Plot Return Period <a class="anchor" id="32"></a>

In [None]:
# aux func for calculating rp time
def t_rp(time_y):
    ny = len(time_y)
    return np.array([1/(1-(n/(ny+1))) for n in np.arange(1,ny+1)])

# historical rp time and sorted annual maxima
trp_hist = t_rp(amax['Fmax'])
trp_hist_val = np.sort(amax['Pmax'])

# simulation rp time and sorted annual maxima
trp_sim = t_rp(time_sim)
trp_sim_val = np.sort(var_sim)

# calculate simulation maxima percentiles
p95 = np.percentile(trp_sim_val, 100-5/2.0, axis=0,)
p50 = np.percentile(trp_sim_val, 50, axis=0,)
p05 = np.percentile(trp_sim_val, 5/2.0, axis=0,)


In [None]:
# Plot return period

fig = go.Figure()
fig.add_trace(go.Scatter(x=trp_sim, y=p95, mode='lines', name='P95', marker_color='mediumturquoise'))
fig.add_trace(go.Scatter(x=trp_sim, y=p05, mode='lines', name='P05', marker_color='mediumturquoise',  fill='tonexty', fillcolor='rgba(0, 181, 204, 0.10)'))
fig.add_trace(go.Scatter(x=trp_sim, y=p50, mode='lines', name='P50', marker_color='black'))
fig.add_trace(go.Scatter(x=trp_hist, y=trp_hist_val, mode='markers', name='Hist', marker_color='red'))


fig.update_xaxes(type="log")
fig.update_layout(    
    xaxis_title = "Return Period (years)",
    yaxis_title = "Precipitation (mm/d)",
    title = 'Annual Maxima',
    width=400*2.5, height=300*2.5
)
fig.show()
