In [37]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os 

In [38]:
# for csv
csv_in = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_ruw.csv'
csv_out = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_refined.csv'
area = r'D:\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken//oppervlak_baggervakken.csv'

# for geojson
baggervakken_4326 = r'D:\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken//Baggervakken_4326.shp'
json_out = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_refined.geojson'

# read into dataframe
df_csv = pd.read_csv(csv_in, sep=';')
df_area = pd.read_csv(area)

# join dataframes on objectcode
df_comb = pd.merge(df_csv, df_area, left_on='Baggervak', right_on='Objectcode')
df_comb['Oppervlak (m2)'] = df_comb['Oppervlak (m2)'].astype(float).apply(np.ceil)
df_comb['Oppervlakte cut tov NGD (m2)'] = df_comb['Oppervlakte cut tov NGD (m2)'].astype(float).apply(np.floor)
df_comb['Oppervlakte cut tov OHD (m2)'] = df_comb['Oppervlakte cut tov OHD (m2)'].astype(float).apply(np.floor)

# calculate the prioritering
df_comb['Percentage cut tov NGD (%)'] = (df_comb['Oppervlakte cut tov NGD (m2)'] / df_comb['Oppervlak (m2)'] * 100).round(2)
df_comb['Percentage cut tov OHD (%)'] = (df_comb['Oppervlakte cut tov OHD (m2)'] / df_comb['Oppervlak (m2)'] * 100).round(2)

df_comb['Gemiddelde dikte cut tov NGD (cm)'] = np.round(df_comb['Gemiddelde dikte cut tov NGD (cm)'], 0).astype(int)
df_comb['Gemiddelde dikte cut tov OHD (cm)'] = np.round(df_comb['Gemiddelde dikte cut tov OHD (cm)'], 0).astype(int)
df_comb['Percentage cut tov NGD (%)'] = np.ceil(df_comb['Percentage cut tov NGD (%)']).astype(int)
df_comb['Percentage cut tov OHD (%)'] = np.ceil(df_comb['Percentage cut tov OHD (%)']).astype(int)
df_comb['Volume cut tov NGD (m3)'] = np.ceil(df_comb['Volume cut tov NGD (m3)']).astype(int)
df_comb['Volume cut tov OHD (m3)'] = np.ceil(df_comb['Volume cut tov OHD (m3)']).astype(int)

df_comb['NGD (-mNAP)'].replace(-999,np.nan, inplace=True)
df_comb['NGD (-mNAP)'].fillna('niet bekend', inplace=True)

df_comb['OHD (-mNAP)'].replace(-999,np.nan, inplace=True)
df_comb['OHD (-mNAP)'].fillna('niet bekend', inplace=True)



# reorder columns 
c = df_comb.columns.tolist()

c_selection = ['Objectcode',
               'Objectnaam',
               'Activiteit',
               'NGD (-mNAP)',
               'OHD (-mNAP)',               
               'Volume cut tov NGD (m3)',
               'Volume cut tov OHD (m3)',               
               'Percentage cut tov NGD (%)',
               'Percentage cut tov OHD (%)',
               'Gemiddelde dikte cut tov NGD (cm)',
               'Gemiddelde dikte cut tov OHD (cm)',
              ]

# # save as csv
df_comb[c_selection].to_csv(csv_out, sep=';', index=False)

# and prepare geojson for overlay
# read geodataframe of shapefile
gdf = gpd.read_file(baggervakken_4326)

# attribute join the gdf and df_comb
df_gdf = gdf.merge(df_comb, left_on='OBJECTNAAM', right_on='Objectnaam')

# append selection with geometry and change column names using a list comprehension to remove parentheses
# and trainling spaces while replacing the remaining spaces with underscores
c_selection.append('geometry')
df_gdf = df_gdf[c_selection]
df_gdf.columns = [re.sub("[\(\[].*?[\)\]]", "", n).rstrip().replace(" ", "_") for n in c_selection]

# save to json
# overwrite bug in fiona https://github.com/geopandas/geopandas/issues/367   
try: 
    os.remove(json_out)
except OSError:
    pass
df_gdf.to_file(json_out, driver='GeoJSON')