In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
from time import time
import sompy
%matplotlib inline

CACHEDIR=C:\Users\hoek.HKV\.matplotlib
Using fontManager instance from C:\Users\hoek.HKV\.matplotlib\fontList.json
backend module://ipykernel.pylab.backend_inline version unknown
backend module://ipykernel.pylab.backend_inline version unknown
backend module://ipykernel.pylab.backend_inline version unknown


In [2]:
file_shp = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\Stations_total_v3.shp'
file_csv = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\BN_data_full.csv'

In [3]:
# read and parse data
df = pd.read_csv(file_csv,sep=';', decimal=',')
df.replace(np.inf, 0, inplace=True)
df.replace(np.nan, 0, inplace=True)

shp_key = df['WSO1_ID'].copy()
# exclude some columns
df = df[df.columns.difference(['OBJECTID', 'Station_ID','WSO1_ID','Catchment_ID','Indicator_of_daily_discharge_availability'])]
df = df.astype(np.float32)

In [None]:
# standardize matrix
X = df.as_matrix()
# _mu = np.nanmean(X, axis=0)
# _sigma = np.sqrt(np.nanmean((X - _mu) ** 2.0, axis=0))
# X = (X - _mu) / _sigma

In [None]:
# go back to pandas dataframe and drop NaN columns
df = pd.DataFrame(data=X, columns=df.columns)
df.dropna(axis=1, how='all', inplace=True)
X = df.as_matrix()

In [None]:
#df.plot(subplots=True,figsize=(12,35))

In [None]:
# train a kohonen network
mapsize = [50,60]
som = sompy.SOMFactory.build(X, mapsize, mask=None, mapshape='planar', lattice='rect', normalization='var', initialization='pca', neighborhood='gaussian', training='batch',component_names=df.columns, name='sompy')  # this will use the default parameters, but i can change the initialization and neighborhood methods
som.train(n_job=1, verbose='debug')  # verbose='debug' will print more, and verbose=None wont print anything

In [None]:
topographic_error = som.calculate_topographic_error()
quantization_error = np.mean(som._bmu[1])
print ("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error))

In [None]:
from sompy.visualization.mapview import View2D
view2D  = View2D(10,10,"rand data",text_size=10)
view2D.show(som, col_sz=7, which_dim="all", desnormalize=True)

In [None]:
codebook = som._normalizer.denormalize_by(som.data_raw, som.codebook.matrix)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
x_side = som.codebook.mapsize[0]
y_side = som.codebook.mapsize[1]
im = plt.imshow(codebook[:,0].reshape(x_side, y_side), vmin=100, vmax=1000)
plt.colorbar(im)
plt.show()

In [None]:
msz = som.codebook.mapsize
cents = som.bmu_ind_to_xy(np.arange(0, msz[0] * msz[1]))

yv = cents[:, 0]
xv = cents[:, 1]
xyv = cents[:, 2]  # coordinates

In [None]:
#But Umatrix finds the clusters easily
u = sompy.umatrix.UMatrixView(50, 50, 'umatrix', show_axis=True, text_size=8, show_text=True)

# #This is the Umat value
UMAT = u.build_u_matrix(som, distance=1, row_normalized=False)
UMAT2 = u.show(som, distance2=1, row_normalized=False, show_data=True, contooor=False, blob=False)

In [None]:
codebook[:,0].reshape(x_side, y_side).shape

In [None]:
xyv

In [None]:
plt.imshow(xyv.reshape(x_side, y_side))

In [None]:
plt.imshow(UMAT.flatten('C').reshape(x_side, y_side))

In [None]:
# from itertools import product
# from string import ascii_lowercase
# keywords = [''.join(i) for i in product(ascii_lowercase, repeat = 2)]
# keywords = keywords[0:len(df.columns)]

In [None]:
df_cb = pd.DataFrame(data=codebook, columns=df.columns.str.replace('_',' '))#keywords)
df_cb['U-matrix'] = UMAT.flatten('C')
df_cb['X'] = xv
df_cb['Y'] = yv
df_cb['XY'] = xyv


In [None]:
df_cb.head()

In [None]:
#df.columns = keywords
df['WSO1_ID'] = shp_key.values
df['som_key'] = som._bmu[0]

In [None]:
# outjson_raw = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments//ruwedata.json'
# outgzip_raw = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments//ruwedata.gzip'
# df.to_json(outjson_raw, orient='records')
# df.to_json(outgzip_raw, orient='records', compression='gzip')

In [None]:
import geopandas as gpd

In [None]:
shp_file_in = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\Stations_total_v3.shp'
gdf = gpd.read_file(shp_file_in)

In [None]:
gdf.crs

In [None]:
# Let's take a copy of our layer
gdf_proj = gdf.copy()

# Reproject the geometries by replacing the values with projected ones
gdf_proj['geometry'] = gdf_proj['geometry'].to_crs(epsg=4326)

In [None]:
gdf_proj.head()

In [None]:
gdf = gdf_proj

In [None]:
gdf_merge_df = gdf.merge(df, on='WSO1_ID')

In [None]:
gdf_sel = gdf_merge_df.loc[:,['WSO1_ID', 'station', 'som_key', 'Country', 'geometry']]

In [None]:
som_key_all = np.sort(gdf_sel['som_key'].values)

In [None]:
unique, counts = np.unique(som_key_all, return_counts=True)

In [None]:
df_counts = pd.DataFrame(np.array((unique.astype(int), counts)).T, columns=['som_key_unique', 'Total stations'])

In [None]:
df_cb_counts = pd.merge(df_cb, df_counts, left_on='XY', right_on='som_key_unique', how='left')
df_cb_counts.drop('som_key_unique', axis=1,inplace=True)

In [None]:
df_cb_counts.head()

In [None]:
gdf_dropdp = gdf_sel.drop_duplicates(subset = ['WSO1_ID', 'station', 'som_key', 'Country'])

In [None]:
gdf_dropdp.loc[:,'som_key'] = gdf_dropdp.loc[:,'som_key'].astype(int)

In [None]:
json_locs_out = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\stations_EU_discharge.json'
with open(json_locs_out, 'w') as f:
    f.write(gdf_dropdp.to_json())

In [None]:
csv_som_out = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\som_EU_discharge.csv'
df_cb_counts.to_csv(csv_som_out, index=False)

In [None]:
# TO DATAPORTAL
from hkvportal.io.services import dataportal as dp
dp = dp()

In [None]:
dp.setDataservice(dataservice = 'https://data.hkvservices.nl/dataservices/')

In [None]:
dp.createDatabase(database = 'EU_extreme_discharge')

In [None]:
# Vega specs vanuit bestand laden
import json
#vega_spec_path = r'D:\jupyter notebooks\3348.10 WAP Awash - Ethiopie\JSON-files\Awash_weredas.json'
vega_spec = json.load(open(json_locs_out))

In [None]:
# json data uploaden naar data portal
dp.setEntryDatabase(database = 'EU_extreme_discharge', key = 'stations', data = json.dumps(vega_spec), description = 'Stations EU extreme river discharges')

In [None]:
import pandas as pd
from io import StringIO

In [None]:
df_cb_counts = df_cb_counts.applymap("{0:.3f}".format)

In [None]:
df_cb_counts.head()

In [None]:
df_cb_counts.loc[:,['X','Y','XY']] = df_cb_counts.loc[:,['X','Y','XY']].astype(int)

In [None]:
df_cb_counts.head()

In [None]:
#output = io.BytesIO()
output = StringIO()
df_cb_counts.to_csv(output, index=False)

In [None]:
dp.setEntryDatabase(database = 'EU_extreme_discharge', key = 'som', data = output.getvalue(), description = 'Self-organizing map EU extreme river discharges')

In [None]:
# Vega specs vanuit bestand laden
import json
vega_spec_path = r'D:\Projects\RO\Jong HKV\Toeleveringen\DominikPaprotny\ExtremeDischargesEUCatchments\vega3_EU_discharge.json'
vega_spec_SOM = json.load(open(vega_spec_path))

In [None]:
# json data uploaden naar data portal
dp.setEntryDatabase(database = 'EU_extreme_discharge', key = 'vegaspec', data = json.dumps(vega_spec_SOM), description = 'Vega3 specification EU extreme river discharges')

#### END