In [61]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os 
import re
#%matplotlib inline

In [62]:
# for csv
csv_in = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_ruw.csv'
csv_out = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_refined.csv'
baggervakken = r'D:\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken\Baggervakken.shp'

json_out = r'D:\OmsWaddenzee\trunk\fews\Modules\prioriteringExport\data//prioritering_refined.geojson'

In [63]:
# read the geodataframe
gdf = gpd.read_file(baggervakken)

# get list of duplicates (both the first and last [and middles if available])
duplicates_all = gdf[gdf['OBJECTCODE'].duplicated(keep=False)]
for obj in duplicates_all['OBJECTCODE'].unique():
    duplicates_single = duplicates_all.loc[duplicates_all['OBJECTCODE']==obj]
    rows2drop = duplicates_single.loc[duplicates_single['END'] != duplicates_single['END'].max()]
    gdf.drop(rows2drop.index.tolist(), axis=0, inplace=True)

In [64]:
gdf['AREA'] = gdf['geometry'].area
#gdf.to_crs(epsg=4326, inplace=True)

In [65]:
gdf_area = gdf[['OBJECTNAAM','OBJECTCODE','AREA','ACTIVITEIT','DIEPTE_MIN','DIEPTE_MAX','geometry']]
gdf_area.columns = ['Objectnaam', 'Objectcode', 'Oppervlak (m2)', 'Activiteit',
       'NGD (-mNAP)', 'OHD (-mNAP)','geometry']
gdf_area['Objectcode'] = 'bv.' + gdf_area['Objectcode'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [66]:
# read into dataframe
df_csv = pd.read_csv(csv_in, sep=';')
#df_area = pd.read_csv(area)

# join dataframes on objectcode
df_comb = pd.merge(df_csv, gdf_area, left_on='Baggervak', right_on='Objectcode')

# but keep geodataframe functionality
df_comb = gpd.GeoDataFrame(df_comb)
df_comb['Oppervlak (m2)'] = df_comb['Oppervlak (m2)'].astype(float).apply(np.ceil)
df_comb['Oppervlakte cut tov NGD (m2)'] = df_comb['Oppervlakte cut tov NGD (m2)'].astype(float).apply(np.floor)
df_comb['Oppervlakte cut tov OHD (m2)'] = df_comb['Oppervlakte cut tov OHD (m2)'].astype(float).apply(np.floor)

In [67]:
# calculate the prioritering
df_comb['Percentage cut tov NGD (%)'] = (df_comb['Oppervlakte cut tov NGD (m2)'] / df_comb['Oppervlak (m2)'] * 100).round(2)
df_comb['Percentage cut tov OHD (%)'] = (df_comb['Oppervlakte cut tov OHD (m2)'] / df_comb['Oppervlak (m2)'] * 100).round(2)

df_comb['Gemiddelde dikte cut tov NGD (cm)'] = np.round(df_comb['Gemiddelde dikte cut tov NGD (cm)'], 0).astype(int)
df_comb['Gemiddelde dikte cut tov OHD (cm)'] = np.round(df_comb['Gemiddelde dikte cut tov OHD (cm)'], 0).astype(int)
df_comb['Percentage cut tov NGD (%)'] = np.ceil(df_comb['Percentage cut tov NGD (%)']).astype(int)
df_comb['Percentage cut tov OHD (%)'] = np.ceil(df_comb['Percentage cut tov OHD (%)']).astype(int)
df_comb['Volume cut tov NGD (m3)'] = np.ceil(df_comb['Volume cut tov NGD (m3)']).astype(int)
df_comb['Volume cut tov OHD (m3)'] = np.ceil(df_comb['Volume cut tov OHD (m3)']).astype(int)

df_comb['NGD (-mNAP)'].replace(-999,np.nan, inplace=True)
df_comb['NGD (-mNAP)'].fillna('niet bekend', inplace=True)

df_comb['OHD (-mNAP)'].replace(-999,np.nan, inplace=True)
df_comb['OHD (-mNAP)'].fillna('niet bekend', inplace=True)

# reorder columns 
c = df_comb.columns.tolist()

c_selection = ['Objectcode',
               'Objectnaam',
               'Activiteit',
               'NGD (-mNAP)',
               'OHD (-mNAP)',               
               'Volume cut tov NGD (m3)',
               'Volume cut tov OHD (m3)',               
               'Percentage cut tov NGD (%)',
               'Percentage cut tov OHD (%)',
               'Gemiddelde dikte cut tov NGD (cm)',
               'Gemiddelde dikte cut tov OHD (cm)',
              ]

In [68]:
# # save as csv
df_comb[c_selection].to_csv(csv_out, sep=',', index=False)

In [69]:
# and prepare geojson for overlay
# append selection with geometry and change column names using a list comprehension to remove parentheses
# and trainling spaces while replacing the remaining spaces with underscores
c_selection.append('geometry')
df_comb = df_comb[c_selection]
df_comb.columns = [re.sub("[\(\[].*?[\)\]]", "", n).rstrip().replace(" ", "_") for n in c_selection]

# save to json
# overwrite bug in fiona https://github.com/geopandas/geopandas/issues/367   
try: 
    os.remove(json_out)
except OSError:
    pass

In [70]:
df_comb.to_file(json_out, driver='GeoJSON')