In [None]:
import geopandas as gpd
import shapely
import glob

from shapely.geometry import Polygon
from shapely.ops import unary_union
import pandas as pd
import numpy as np
%matplotlib inline

In [None]:
rd_string = ("+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 "
             "+k=0.999908 +x_0=155000 +y_0=463000 +ellps=bessel +units=m "
             "+towgs84=565.2369,50.0087,465.658,-0.406857330322398,0.350732676542563,-1.8703473836068,4.0812 "
             "+no_defs +no_defs")

In [None]:
path = r'D:\Projects\Pr\3317.50\Toeleveringen\20191203_update_baggervakken'
shp_files = glob.glob(r'{}\**\*.shp'.format(path), recursive=True)
print(shp_files, len(shp_files))

In [None]:
gdf_comb = gpd.GeoDataFrame()

In [None]:
for idx, shp in enumerate(shp_files):
    print(idx, shp)
    
    gdf = gpd.read_file(shp)
    gdf.crs = rd_string

    end = 21000101
    # bulk start date
    start = 20191128 #20-08-2018

    # for specific vakken set the start and objectcode
    if '191202_Harlingen-Noordzee' in shp:
        #start = 20181012
        objectcode = '1.7.2.3.1.A'# 1.7.1.3.2.2'     
    
    if '191202_GL-GS' in shp:
        #start = 20190330    
        objectcode = '1.7.4.1.1.B'       
        
#     if '190501_Terschelling, Schuitengat' in shp:
#         #start = 20190330
#         objectcode = '1.7.2.1.1.B'     
        
#     if '190501_Va4-RG' in shp:
#         #start = 20190330
#         objectcode = '1.7.3.1.C'   
        
#     if '190501_RG' in shp:
#         #start = 20190330
#         objectcode = '1.7.3.1.3'           
    
#     # for remaining only map the objectcode and use bulk start date for date
#     if '181105_Terschelling, Schuitengat' in shp:  
#         objectcode = '1.7.2.1.1.B'  
    
#     if '181105_Va6-Va9' in shp: 
#         objectcode = '1.7.3.1.B'      
    
#     if '181105_Va9-Va13' in shp:
#         objectcode = '1.7.3.1.2.B'            


    gdf.loc[0,'geometry'] = Polygon(gdf.geometry[0])    
    gdf['START'] = pd.Series([start], index=gdf.index)
    gdf['END'] = pd.Series([end], index=gdf.index)
    gdf['OBJECTCODE'] = pd.Series([objectcode], index=gdf.index)
    
    gdf_comb = gdf_comb.append(gdf)#, ignore_index=True)

In [None]:
gdf_comb.reset_index(drop=True)

In [None]:
gdf_comb.plot()

In [None]:
g1 = gdf_comb['geometry'].iloc[0]

### read current valid shapefile

In [None]:
shp_baggervakken = r"D:\Projects\Pr\3317.50\Toeleveringen\current\Baggervakken.shp"
df_bgv = gpd.read_file(shp_baggervakken)
df_bgv.head()

## append new shapes baggervakken

In [None]:
df_bg_new = df_bgv.append(gdf_comb)

In [None]:
# reset and drop the index
df_bg_new.reset_index(drop=True, inplace=True)

### change END date last valid feature
### and copyover other metadata to new valid feature

In [None]:
for idx,val in gdf_comb.iterrows():
    print(val['OBJECTCODE'])
    obj_code = val['OBJECTCODE']
    df_sel = df_bg_new[df_bg_new['OBJECTCODE']==obj_code]
    
    # only select latest two known baggervakken
    df_sel.START = df_sel.START.astype(int)
    df_iter_two_row = df_sel.sort_values(by=['START'], ascending=False)[0:2]

    # copy all column values except column START, END and geometry
    unwanted = {'START','END','geometry'}
    item_list = [e for e in df_iter_two_row.columns if e not in unwanted]
    df_iter_two_row.loc[(df_iter_two_row.index.isin(df_iter_two_row.index[:1])), item_list] = df_iter_two_row.loc[(
        df_iter_two_row.index.isin(df_iter_two_row.index[1:])), item_list].values

    # change END date of former valid feature to START date of existing valid baggervak
    df_iter_two_row.END.iloc[1] = df_iter_two_row.START.iloc[0]    
    print(df_iter_two_row.loc[:,['END','START', 'OBJECTCODE', 'OBJECTNAAM']])
    
    # set adapted row in the bigger geodataframe
    df_bg_new.iloc[df_iter_two_row.index] = df_iter_two_row    

# update 'AREA_REAL' column for new area in m2

In [None]:
df_bg_new.loc[:,'AREA_REAL'] = np.round(df_bg_new['geometry'].area, 0)

#### save to new shapefile

In [None]:
new_path = r'D:\Projects\Pr\3317.50\Toeleveringen\new\Baggervakken_20191128.shp'
df_bg_new.to_file(new_path)

In [None]:
df_bg_new.plot()

In [None]:
df_bg_new.columns

In [None]:
df_selection = df_bg_new[df_bg_new['OBJECTCODE'] == '1.7.3.1.2.B']

In [None]:
df_custom = df_selection[['OBJECTNAAM', 'geometry']].reset_index(drop=True)

In [None]:
df_custom.to_file('')

In [None]:
duplicates_all = df_bg_new[df_bg_new['OBJECTCODE'].duplicated(keep=False)]

In [None]:
duplicates_all.index

In [None]:
# EDIT: 2018-09-21 this is properly handled in above code
## FURTHER CHANGES APPLIED IN QGIS (FILL COLUMN AND CHANGING END DATE OF PREVIOUS VALID BAGGERVAK)
# /EDIT

In [None]:
## VALIDATE  NEW BAGGERVAKKEN

In [None]:
rd_string = ("+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 "
             "+k=0.999908 +x_0=155000 +y_0=463000 +ellps=bessel +units=m "
             "+towgs84=565.2369,50.0087,465.658,-0.406857330322398,0.350732676542563,-1.8703473836068,4.0812 "
             "+no_defs +no_defs")

In [None]:
def baggervak_to_current(path_shp, crs_string):
    """
    function to get current valid baggervakken
    also sets appropriate coordinates system
    """

    gdf = gpd.read_file(path_shp)
    #gdf.crs = rd_string

    # only get currently valid shapes
    # get list of duplicates (both the first and last [and middles if available])
    duplicates_all = gdf[gdf['OBJECTCODE'].duplicated(keep=False)]
    for obj in duplicates_all['OBJECTCODE'].unique():
        duplicates_single = duplicates_all.loc[duplicates_all['OBJECTCODE'] == obj]
        rows2drop = duplicates_single.loc[duplicates_single['END']
                                          != duplicates_single['END'].max()]
        gdf.drop(rows2drop.index.tolist(), axis=0, inplace=True)
        
    gdf = gdf[gdf['END']=='21000101']
    
    
    return gdf

In [None]:
#shp_baggervakken = r"D:\FEWSProjecten\OmsWaddenzee\trunk\fews\Config\MapLayerFiles\Baggervakken\Baggervakken_20180921.shp"
shp_baggervakken = r"D:\Projects\Pr\3317.50\Toeleveringen\new\Baggervakken_20191128.shp"

In [None]:
bgf_val = baggervak_to_current(shp_baggervakken, rd_string)

In [None]:
len(bgf_val['OBJECTCODE'].unique())

In [None]:
len(bgf_val['OBJECTCODE'])

In [None]:
np.sort(bgf_val['OBJECTCODE'].unique())#.sort()

In [None]:
bgf_val.to_file(r'D:\Projects\Pr\3317.50\Verzending\20191203_update_baggervakken//Baggervakken-vanaf-11-november-2019.shp')