In [1]:
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg
from shapely.geometry import Point
import os
import subprocess as sp
import re
from difflib import SequenceMatcher
%matplotlib inline

In [2]:
def cmd(command):
    print (sp.list2cmdline(command))
    norm = sp.Popen(sp.list2cmdline(command),stdout=sp.PIPE, shell=True)
    out_cmd = norm.communicate()
    return out_cmd

In [3]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def replace_similar_color(color, idx, item, df, column):
    smty = similar(color, item)
    if smty >= 0.5:
        df.loc[idx,column] = color

In [4]:
def urlify(s):
    # Remove all non-word characters (everything except numbers and letters)
    s = re.sub(r"[^\w\s]", '', s)    
    # Replace all runs of whitespace with a single dash
    s = re.sub(r"\s+", '-', s)    
    return s

In [5]:
def parse_ton_file_to_gdf(ton_in):
    df = pd.read_excel(ton_in)    
    geometry = [Point(xy) for xy in zip(df.POINT_X, df.POINT_Y)]
    df = df.drop(['POINT_X', 'POINT_Y'], axis=1)
    crs = rd_string
    gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)    

    # FOR GMT BAGGERVAKKEN
    # 11 save GDF to SHP
    gdf['geometry'] = gdf['geometry'].to_crs(epsg=4326)
    gdf.crs = from_epsg(4326)
    gdf = gdf[['NAAM','KLEUR','geometry']]    
    
    return gdf

In [6]:
def ton_gdf_to_label_file(gdf, txt_out):
    df_label = pd.DataFrame()
    df_label.loc[:,'lon'] = gdf['geometry'].x.values
    df_label.loc[:,'lat'] = gdf['geometry'].y.values
    df_label.loc[:,'nme'] = gdf['NAAM'].values    

    df_label.fillna('NAN', inplace=True)
    for idx, item in enumerate(df_label['nme']):
        try:
            new_label = urlify(item)
        except:
            new_label = 'NAN'
        df_label.loc[idx,'nme'] = new_label
        
    df_label.to_csv(txt_out, sep=',', header=False, index=False)
    return df_label   

In [7]:
def ton_gdf_to_color_file(gdf, txt_out):
    
    df_color = pd.DataFrame()
    df_color.loc[:,'lon'] = gdf['geometry'].x.values
    df_color.loc[:,'lat'] = gdf['geometry'].y.values
    df_color.loc[:,'clr'] = gdf['KLEUR'].values    

    # fill nan values with black
    df_color.fillna('zwart', inplace=True)
    # replace similar strings with corresponding name color
    for idx, item in enumerate(df_color['clr']):    
        replace_similar_color('groen', idx, item, df_color, 'clr')
        replace_similar_color('rood', idx, item, df_color, 'clr')
        replace_similar_color('geel', idx, item, df_color, 'clr')
        replace_similar_color('wit', idx, item, df_color, 'clr')
        replace_similar_color('zwart', idx, item, df_color, 'clr')

        if item in ['zwart/geel/zwart', 'F(5) 20s']:
            df_color.loc[idx,'clr'] = 'zwart'
        if item in ['groen-rood-groen', 'gr/rd', 'groen/rood/groen']:
            df_color.loc[idx,'clr'] = 'groen'
        if item in ['rd gr rd gr', 'Rood/Groen/Rood', 'rood/wit/vert']:
            df_color.loc[idx,'clr'] = 'rood'

    # replace color names with corresponding hexadecimal values
    df_color['clr'] = df_color['clr'].replace('groen', 0)  #   correspond to #78CA7B in GMT command
    df_color['clr'] = df_color['clr'].replace('zwart', 1)  #   correspond to #2F2F2F in GMT command
    df_color['clr'] = df_color['clr'].replace('geel', 2)  #    correspond to #FFEE8C in GMT command
    df_color['clr'] = df_color['clr'].replace('rood', 3)  #    correspond to #D80A0A in GMT command
    df_color['clr'] = df_color['clr'].replace('wit', 4)  #     correspond to #CBD4D8 in GMT command

    # store colors to GMT readible file
    df_color.to_csv(txt_out, sep=',', header=False, index=False)
    return df_color

In [8]:
rd_string = ("+proj=sterea +lat_0=52.15616055555555 +lon_0=5.38763888888889 "
             "+k=0.999908 +x_0=155000 +y_0=463000 +ellps=bessel +units=m " 
             "+towgs84=565.2369,50.0087,465.658,-0.406857330322398,0.350732676542563,-1.8703473836068,4.0812 "
             "+no_defs +no_defs")

In [9]:
# path to tmp data directory
tmpDataDir = r'D:\Projects\Pr\3317.20\PTS2PDF_usingGMT\data_dir_tmp'

# path to betonningen
tonDataDir = r'D:\Projects\Pr\3317.20\PTS2PDF_usingGMT\data_dir_ton'

# path to gdal/ogr root
rootOgrGdal = r'C:\Python35\Lib\site-packages\osgeo'
ogr2ogr = os.path.join(rootOgrGdal, 'ogr2ogr.exe')

In [10]:
ton_in = os.path.join(tonDataDir,'TON_07sep2017.xlsx')
gdf = parse_ton_file_to_gdf(ton_in)
# store labels to GMT readible file
file_tmp = 'TON_07sep2017'
labels = os.path.join(tonDataDir, '{}_ton_label.txt'.format(file_tmp))
colors = os.path.join(tonDataDir, '{}_ton_color.txt'.format(file_tmp))

In [14]:
df_color = ton_gdf_to_color_file(gdf, txt_out=colors)
df_label = ton_gdf_to_label_file(gdf, txt_out=labels)

In [15]:
df_label.head()

Unnamed: 0,lon,lat,nme
0,5.571823,53.330043,ABT
1,6.68661,53.52873,Mine
2,6.666287,53.536983,A-5
3,6.753172,53.49547,Tanker-reede-1
4,6.736955,53.50018,Preede-1


In [16]:
df_color.head()

Unnamed: 0,lon,lat,clr
0,5.571823,53.330043,0
1,6.68661,53.52873,1
2,6.666287,53.536983,0
3,6.753172,53.49547,1
4,6.736955,53.50018,1
