# Rivers Flow Data

In [1]:
# Imports
import numpy as np
from __future__ import division
%matplotlib inline
import matplotlib.pyplot as plt

import extreme_data as extr
import clef_algo as clf
import hill_estimator as hill
import peng_estimator as peng
import kappa_estimator as kap
import hydro_map as hm
import damex_algo as dmx
import generate_alphas as ga
import utilities as ut
import peng_estimator as pg
import kappa_estimator as kp

from sklearn.model_selection import ShuffleSplit

## Load data

In [2]:
X = np.load('data/hydro_data/raw_discharge.npy')
V = extr.rank_transformation(X)
n, d = V.shape

## Split train/test

In [3]:
rs = ShuffleSplit(n_splits=1, test_size=.1, random_state=1)
ind_train, ind_test = list(rs.split(V))[0]
V_train = V[ind_train]
n_train = V_train.shape[0]
print('nb train:', n_train)
V_test = V[ind_test]
n_test = V_test.shape[0]
print('nb test:', n_test)

nb train: 13149
nb test: 1461


### Extreme points

In [4]:
R = 100
k = int(n_train/R - 1)
feats_min = 0
print('k = ', k)
print('nb extreme points:', np.sum(np.sum(V > R, axis=1) > feats_min))
V_bin = 1.*(V > R)

V_bin_train = V_bin[ind_train]
V_bin_train = V_bin_train[np.sum(V_bin_train, axis=1) > feats_min]
print('nb extreme train points:', np.sum(np.sum(V_train > R, axis=1) > feats_min))

V_bin_test = V_bin[ind_test]
V_bin_test = V_bin_test[np.sum(V_bin_test, axis=1) > feats_min]
print('nb extreme test points:', np.sum(np.sum(V_test > R, axis=1) > feats_min))

k =  130
nb extreme points: 2674
nb extreme train points: 2404
nb extreme test points: 270


## CLEF

In [None]:
kappa_min = 0.2
faces_clf = clf.clef_0(V_bin_train, kappa_min)
#hm.map_visualisation(faces_clf, d)

In [None]:
plt.hist(list(map(len, faces_clf)))
print('nb faces:', len(faces_clf))
print(ut.dist_levenshtein_R(faces_clf, d, V_bin_dmx_test))

In [None]:
Rs = np.array([10, 50, 100, 150, 200, 400, 600, 800, 1000, 2000, 3000, 5000, 7500, 10000])
Ds_1, Ns = ut.dist_levenshtein_Rs(faces_clf, d, Rs, V_test, eps=0.3)
print(min(Ds_1), Rs[np.argmin(Ds_1)])
plt.plot(Rs, Ds_1)

## Hill

In [None]:
k = int(n_train/R - 1)
r_p = n_train/(k + int(k**(3./4)) + 1)
r_m = n_train/(k - int(k**(3./4)) + 1)
delta = 0.05
faces_hill = hill.hill_0(V_train, V_train > R, V_train > r_p, V_train > r_m, delta, k)#, var_max=2, verbose=1)
#hm.map_visualisation(faces_hill, d)

In [None]:
plt.hist(list(map(len, faces_hill)))
print('nb faces:', len(faces_hill))
print(ut.dist_levenshtein_R(faces_hill, d, V_bin_test))

In [None]:
Rs = np.array([10, 50, 100, 150, 200, 400, 600, 800, 1000, 2000, 3000, 5000, 7500, 10000])
Ds_1, Ns = ut.dist_levenshtein_Rs(faces_hill, d, Rs, V_test, eps=0.3)
print(min(Ds_1), Rs[np.argmin(Ds_1)])
plt.plot(Rs, Ds_1)

## DAMEX

In [None]:
eps = 0.3
V_bin_dmx = 1.*(V_train[np.max(V_train, axis=1) > R] > R*eps)
V_bin_dmx = V_bin_dmx[np.sum(V_bin_dmx, axis=1) > feats_min]
faces_dmx, mass = dmx.damex_0(V_bin_dmx)
faces_dmx = faces_dmx[:np.sum(mass > 1)]
#hm.map_visualisation(faces_dmx, d)

In [None]:
plt.hist(list(map(len, faces_dmx)))
print('nb faces:', len(faces_dmx))
V_bin_dmx_test = 1.*(V_test[np.max(V_test, axis=1) > R] > R*eps)
V_bin_dmx_test = V_bin_dmx_test[np.sum(V_bin_dmx_test, axis=1) > feats_min]
print(ut.dist_levenshtein_R(faces_dmx, d, V_bin_dmx_test))

In [None]:
Rs = np.array([10, 50, 100, 150, 200, 400, 600, 800, 1000, 2000, 3000, 5000, 7500, 10000])
Ds_1, Ns = ut.dist_levenshtein_Rs(faces_dmx, d, Rs, V_test, eps=0.3)
print(min(Ds_1), Rs[np.argmin(Ds_1)])
plt.plot(Rs, Ds_1)

## Peng

In [11]:
k = int(n_train/R - 1)
r_p = n_train/(k + int(k**(3./4)) + 1)
r_m = n_train/(k - int(k**(3./4)) + 1)
delta_p = 0.2
r_2 = n_train/(2*k + 1)
faces_peng = pg.peng_0(V_train > R,
                       V_train > r_2, V_train > r_p,
                       V_train > r_m, delta_p, k, rho_min=0.05)
#hm.map_visualisation(faces_peng, d)

2 : 551
3 : 491
4 : 139
5 : 17


In [10]:
print('nb faces:', len(faces_peng))
print(ut.dist_levenshtein_R(faces_peng, d, V_bin_train))

nb faces: 1963
0.5250510025832417


In [None]:
Rs = np.array([10, 50, 100, 150, 200, 400, 600, 800, 1000, 2000, 3000, 5000, 7500, 10000])
Ds_1, Ns = ut.dist_levenshtein_Rs(faces_peng, d, Rs, V_test, eps=0.3)
print(min(Ds_1), Rs[np.argmin(Ds_1)])
plt.plot(Rs, Ds_1)

## CLEF asymptotic

In [None]:
k = int(n_train/R - 1)
r_p = n_train/(k + int(k**(3./4)) + 1)
r_m = n_train/(k - int(k**(3./4)) + 1)
delta_k = 0.05
kappa_as = 0.3
faces_kapas = kp.kappa_as_0(V_train, V_train > R, V_train > r_p, V_train > r_m, delta_k, k, kappa_as)#, var_max=1.5)
#hm.map_visualisation(faces_kapas, d)

In [None]:
print('nb faces:', len(faces_kapas))
print(ut.dist_levenshtein_R(faces_kapas, d, V_bin_test))