In [1]:
import geojson
import netCDF4 as nc
import os
import pandas as pd

In [2]:
folder = r'p:\11205479-coclico\data\01_storm_surge_jrc'
#dataset = 'CoastAlRisk_Europe_EESSL_RCP45.nc'
#dataset = 'CoastAlRisk_Europe_EESSL_RCP85.nc'
dataset = 'CoastAlRisk_Europe_EESSL_Historical.nc'

In [4]:
ds = nc.Dataset(os.path.join(folder, dataset))
ds

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_64BIT_OFFSET data model, file format NETCDF3):
    title: European extreme storm surge level
    Institution: Joint European Research Center, Institute of Environment and Sustainability, Via Enrico Fermi 2749, I-21027-Ispra
    Project Name: Prototype of a first Global Integrated Coastal Impact-based Flood Alert and Risk Assessment Tool
    Project Acronym: CoastAlRisk
    reference: Vousdoukas MI, Voukouvalas E, Annunziato A, Giardino A, Feyen L. Projections of extreme storm surge levels along Europe. Clim Dyn. February 2016. doi:10.1007/s00382-016-3019-5
    email: michail.vousdoukas@ec.europa.eu
    version: 1.0
    terms_for_use: European Union, 1995-2015.
Reuse is authorised, provided the source is acknowledged. The reuse policy of the European Commission is implemented by a Decision of 12 December 2011.
    disclaimer: Unless the following would not be permitted or valid under applicable law, the following applies to the data

In [5]:
# write data to files (single)
#for i, rp in enumerate(ds['RP']):
#    features = []
#    print(rp)
#    for j, (lon, lat) in enumerate(zip(ds['longitude'][:], ds['latitude'][:])):
#        point = geojson.Point((float(lon), float(lat)))
#        feature = geojson.Feature(geometry=point)
#        feature['properties']['locationId'] = j
#        feature['properties']['ssl'] = ds['ssl'][j][i]
#        features.append(feature) 
#
#    # store the features
#    collection = geojson.FeatureCollection(features)
#    with open(os.path.join(folder, 'platform', r'EU_EESSL_JRC_RCP45_%s.geojson'%(int(rp))), 'w') as f:
#        geojson.dump(collection, f)

In [6]:
# write data to files (multiple) 
rp = ds['RP'][:].filled()

features = []
for j, (lon, lat) in enumerate(zip(ds['longitude'][:], ds['latitude'][:])):
    point = geojson.Point((float(lon), float(lat)))
    feature = geojson.Feature(geometry=point)
    feature['properties']['locationId'] = j
    feature['properties']['ssl'] = str(ds['ssl'][j].filled())
    feature['properties']['rp'] = str(rp)
    features.append(feature) 

# store the features
#collection = geojson.FeatureCollection(features)
#with open(os.path.join(folder, 'platform', r'EU_EESSL_JRC_Hist_RP.geojson'), 'w') as f:
#    geojson.dump(collection, f)

In [7]:
# check geojson

with open(os.path.join(folder, 'platform', 'EU_EESSL_JRC_RCP85_RP.geojson')) as f:
    check = geojson.load(f)
    
check.keys()
check['features'][0]

{"geometry": {"coordinates": [-0.1, 36.1], "type": "Point"}, "properties": {"locationId": 0, "rp": "[   5.   10.   20.   50.  100.  200.  500. 1000.]", "ssl": "[1.03412 1.06436 1.09628 1.14161 1.17879 1.21896 1.27761 1.32699]"}, "type": "Feature"}

In [8]:
# get minima and maxima for the colormap boundaries
for idx, i in enumerate(rp):
    print(i, min(ds['ssl'][:,idx].filled()), max(ds['ssl'][:,idx].filled()))

5.0 0.23243000000000003 4.19467
10.0 0.23602 4.20914
20.0 0.23951999999999996 4.31781
50.0 0.24407999999999996 4.71357
100.0 0.24753999999999998 4.96707
200.0 0.25106000000000006 5.18857
500.0 0.25583 5.44121
1000.0 0.2595799999999999 5.60734


In [10]:
# parquet trial (https://arrow.apache.org/docs/python/parquet.html)
import pyarrow.parquet as pq

In [11]:
# multiple data files

rp = ds['RP'][:].filled()

df = pd.DataFrame()
df['lon'] = ds['longitude'][:]
df['lat'] = ds['latitude'][:]
df['locationId'] = [i for i in range(len(ds['longitude'][:]))]
for j in range(len(rp)):   
    df['ssl_rp%s'%int(rp[j])] = ds['ssl'][:,j].filled()
    
#df.to_parquet(os.path.join(folder, 'platform', r'EU_EESSL_JRC_Hist_RP.parquet'), compression='gzip')

In [12]:
fol_check = r'p:\11205479-coclico\data\01_storm_surge_jrc\platform'
file_check = r'EU_EESSL_JRC_Hist_RP.parquet'
check = pd.read_parquet(os.path.join(fol_check, file_check), engine='pyarrow')

In [13]:
check.sample(frac=0.01)

Unnamed: 0,lon,lat,locationId,ssl_rp5,ssl_rp10,ssl_rp20,ssl_rp50,ssl_rp100,ssl_rp200,ssl_rp500,ssl_rp1000
1349,21.5,36.7,1349,1.71616,1.75768,1.79581,1.84153,1.87296,1.90194,1.93687,1.96101
2189,8.7,64.1,2189,1.81957,1.95754,2.07927,2.21861,2.30987,2.39057,2.48319,2.54402
1265,1.3,50.7,1265,2.22814,2.40461,2.5559,2.72504,2.83431,2.93065,3.04197,3.11636
2113,6.9,57.7,2113,1.81839,1.9516,2.07011,2.20739,2.29858,2.38037,2.47598,2.5401
272,-20.7,63.5,272,1.42429,1.49844,1.56583,1.64576,1.70009,1.74974,1.80899,1.84952
435,-3.9,35.5,435,0.9645,0.9866,1.0079,1.0354,1.05619,1.07742,1.10695,1.1311
212,-19.1,63.3,212,1.3952,1.45448,1.50653,1.56613,1.60544,1.6406,1.68181,1.70965
993,14.5,36.1,993,0.65718,0.66592,0.67335,0.68153,0.6867,0.69115,0.69611,0.69929
269,-20.1,63.1,269,1.4073,1.46712,1.51878,1.57674,1.61413,1.64694,1.68446,1.70916
442,-4.1,35.5,442,0.99549,1.01784,1.0403,1.07129,1.09681,1.12537,1.17039,1.21261
