In [None]:
import seaconex

import camelot
import requests
import time
import lxml
import re
import json
import os
import io
import fiona
import zipfile

import pandas as pd
import numpy as np
import geopandas as gpd
import networkx as nx

from bs4 import BeautifulSoup
from shapely.geometry import Point, LineString

## SMDG Terminal Code List (TCL)

https://smdg.org/documents/smdg-code-lists/smdg-terminal-code-list/  
https://github.com/smdg-org/Terminal-Code-List  
https://www.google.com/maps/d/viewer?mid=1nbJI9NkpAayVpSJqy13qJ2xhatcIKQJK&ll=-3.81666561775622e-14%2C-4.5703125&z=2  

In [None]:
smdg_tcl_path = '../data/raw/smdg-tcl-v20210401.csv'

if not os.path.exists(smdg_tcl_path):
    url = 'https://raw.githubusercontent.com/smdg-org/Terminal-Code-List/8886d86445d0a53e5495a4aeb11beedc92bd20cb/SMDG%20Terminal%20Code%20List.csv'
    # alt url in .xlsx
    # https://smdg.org/wp-content/uploads/Codelists/Terminals/SMDG-Terminal-Code-List-v20210401.xlsx
    r = requests.get(url)
    with open(smdg_tcl_path, 'wb') as f:
        f.write(r.content)

In [None]:
df_smdg = pd.read_csv(
    smdg_tcl_path
).drop(
    columns=[
        'Latitude (DMS)',
        'Longitude (DMS)',
        'Last change',
        'Valid from',
        'Valid until',
#         'Remarks'
    ]
).rename(
    columns={
        'UNLOCODE':'port_unlocode',
        'Alternative UNLOCODEs':'port_unlocode_alt',
        'Terminal Code': 'terminal',
        'Latitude': 'latitude',
        'Longitude': 'longitude',
        'Terminal Facility Name':'terminal_name',
        'Terminal Company Name':'terminal_operator',
        'Terminal Website':'terminal_website',
        'Terminal Address':'terminal_address',
        'Remarks': 'remarks'
    }
).fillna("")

In [None]:
df_smdg.columns.tolist()

In [None]:
df_smdg

## UNLOCODE

https://github.com/tadziqusky/unlocode-ports/blob/master/extract%20ports.ipynb

In [None]:
unlocode_path = '../data/raw/unlocode-v20201218.zip'

if not os.path.exists(unlocode_path):
    url = 'https://unece.org/sites/default/files/2020-12/loc202csv.zip'
    # alt url in .xlsx
    # https://smdg.org/wp-content/uploads/Codelists/Terminals/SMDG-Terminal-Code-List-v20210401.xlsx
    r = requests.get(url)
    with open(unlocode_path, 'wb') as f:
        f.write(r.content)

In [None]:
li = list()

with zipfile.ZipFile(unlocode_path, 'r') as zf:
    for file in zf.namelist():      
        if file.endswith('.csv'):
            print(file)
            df = pd.read_csv(
                zf.open(file), 
                sep=',', 
#                 encoding = "ISO-8859-1", 
                encoding = 'unicode_escape',
                header=None
#                 error_bad_lines=False
            )
            li.append(df)

In [None]:
li[0]

In [None]:
# li[1]

In [None]:
# li[2]

In [None]:
# li[3]

In [None]:
df_unlocode = pd.concat(li[1:])

In [None]:
df_unlocode.columns = [
    'change', 
    'country', 
    'location', 
    'name', 
    'name_wo_diacritics', 
    'subdivision', 
    'status', 
    'function', 
    'date', 
    'iata', 
    'coordinates', 
    'remarks' 
]

In [None]:
df_unlocode.head()

In [None]:
country_condition = df_unlocode['location'].isnull() & df_unlocode['change'].isnull()

countries_df = df_unlocode[country_condition]
countries_df = countries_df[["country", "name"]]
countries_df = countries_df.set_index('country')
countries_df['name'] = countries_df['name'].str.slice(start=1)
countries_df = countries_df.rename(columns={"name": "country_name"})

In [None]:
countries_df.head(10)

In [None]:
countries_df.query('country == "BM"')

In [None]:
ports_df = df_unlocode[df_unlocode["status"].str.slice(start=0, stop=1) == '1'].drop(
    columns=[
        'change', 
        'status', 
        'date', 
        'remarks', 
        'function', 
        'iata', 
        'subdivision'
    ]
)

# ports_df = ports_df.dropna(subset=['coordinates']) 

# ports_df.loc[:, "latitude"] = ports_df.coordinates.str.split(expand=True).loc[:, 0]
# ports_df.loc[:, "longitude"] = ports_df.coordinates.str.split(expand=True).loc[:, 1]

# ports_df["latitude"] = (ports_df["latitude"].str.slice(0, 2).astype("double") + ports_df["latitude"].str.slice(2, 4).astype("double") / 60) * ports_df["latitude"].str.slice(4, 5).map(lambda x: 1 if x == "N" else -1)
# ports_df["longitude"] = (ports_df["longitude"].str.slice(0, 3).astype("double") + ports_df["longitude"].str.slice(3, 5).astype("double") / 60) * ports_df["longitude"].str.slice(5, 6).map(lambda x: 1 if x == "E" else -1)

# ports_df = ports_df.drop(columns=["coordinates"])

In [None]:
ports_df['port_unlocode'] = ports_df['country'] + ports_df['location']

In [None]:
ports_df

In [None]:
ports_df.query('country == "BM"')

In [None]:
# ports_df.port_unlocode == 'BMBDA'
# ports_df[ports_df.port_unlocode.isin(['BMBDA'])]

df_unlocode[df_unlocode.country.isin(['BM'])]

In [None]:
ports_df

In [None]:
ports_with_country_names_df = ports_df.join(countries_df, how='left', on="country")
ports_with_country_names_df = ports_with_country_names_df.reset_index()
ports_with_country_names_df = ports_with_country_names_df.drop(columns=["index"])

ports_with_country_names_df

In [None]:
# ports_with_country_names_df['port_unlocode'] = ports_with_country_names_df['country'] + ports_with_country_names_df['location']

In [None]:
ports_with_country_names_df

In [None]:
terminals_df = pd.read_json('../data/processed/terminal.json')

In [None]:
terminals_df = terminals_df.rename(
    columns={
        'id':'terminal',
        'terminal_port':'port_name',
        'terminal_unlocode':'port_unlocode',
        'terminal_unlocode_alt':'port_unlocode_alt',
        'terminal_lat': 'latitude',
        'terminal_lon': 'longitude',
        'terminal_facility_name':'terminal_name',
        'terminal_company_name':'terminal_operator',
        'Terminal Website':'terminal_website'
    }
).drop(
    columns=[
        'TAC1',
        'A',
        'P1/AL3',
        'BI',
        'SI',
        'O'
    ]
)

In [None]:
# terminals_df

In [None]:
wpi_smdg = {
    'PSAP':8080, 
    'NCSPA':8474, 
    'ACOT':31250, 
    'DPWS':35580, 
    'RDT':34350,
    'BCMT':9170,
    'FIT':8630,
    'OHPANC':8560,
    'GCMT':8530,
    'NCMT':8500,
    'NIT':8280,
    'SMT':8210,
    'DMT':8210,
    'BIT':8025,
    'PSAP':8080,
    'FAPS':7810,
    'MAHER':7810,
    'PNCT':7810,
    'FCOVE':6340,
    'CTA':30780,
    'UCT':30780,
    'AET':31250,
    'K869':31250,
    'ACOT':31250,
    'RDT':34350,
    'RSCT':34690,
    'DPWLG':31420
}



In [None]:
gpd.GeoDataFrame(
    terminals_df,
    crs='EPSG:3857', 
    geometry=gpd.points_from_xy(terminals_df.longitude, terminals_df.latitude)
).drop(
    columns=['latitude', 'longitude']
).to_file(
    '../data/interim/terminals.geojson',
    driver='GeoJSON',   
)

In [None]:
gpd.GeoDataFrame(
    terminals_df,
    crs='EPSG:3857', 
    geometry=gpd.points_from_xy(terminals_df.longitude, terminals_df.latitude)
).drop(
    columns=['latitude', 'longitude']
).to_file(
    '../data/interim/terminals.gpkg',
    driver='GPKG',   
)

In [None]:
routes = gpd.read_file('../data/interim/master_schedules_edges.json')

In [None]:
routes = routes[['transport_edge_no', 'terminal_call_facility_1', 'terminal_call_facility_2']]

In [None]:
routes2 = pd.merge(
    left=routes,
    right=terminals_df[['terminal', 'latitude', 'longitude']].add_suffix('_1'),
    how='left', 
    left_on=['terminal_call_facility_1'], 
    right_on=['terminal_1']
).drop(columns=['terminal_1']).reset_index(drop=True)

In [None]:
routes3 = pd.merge(
    left=routes2,
    right=terminals_df[['terminal', 'latitude', 'longitude']].add_suffix('_2'),
    how='left', 
    left_on=['terminal_call_facility_2'], 
    right_on=['terminal_2']
).drop(columns=['terminal_2']).reset_index(drop=True)

In [None]:
routes3.drop(columns=['terminal_call_facility_1','terminal_call_facility_2'], inplace=True)

In [None]:
routes3.columns=['route name','olon','olat','dlon','dlat']

In [None]:
routes3

In [None]:
routes3.to_csv('../data/searoute/data/in/searoutes.csv', index=False)

In [None]:
searoutes = gpd.read_file('../data/interim/master_schedules_edges.json')[['transport_edge_no', 'terminal_call_facility_1', 'terminal_call_facility_2']]

searoutes.merge(
    right=terminals_df[['terminal', 'latitude', 'longitude']].add_suffix('_1'),
    how='left', 
    left_on=['terminal_call_facility_1'], 
    right_on=['terminal_1']
# ).drop(
#     columns=['terminal_1']
# ).reset_index(
#     drop=True
).merge(
    right=terminals_df[['terminal', 'latitude', 'longitude']].add_suffix('_2'),
    how='left', 
    left_on=['terminal_call_facility_2'], 
    right_on=['terminal_2']
).drop(
    columns=[
        'terminal_1',
        'terminal_2',
        'terminal_call_facility_1',
        'terminal_call_facility_2'
    ]
# ).reset_index(
#     drop=True
).rename(
    columns={
        'transport_edge_no':'route name',
        'latitude_1':'olat',
        'longitude_1':'olon',
        'latitude_2': 'dlat',
        'longitude_2':'dlon'
    }
)[['route name','olon','olat','dlon','dlat']].to_csv(
    path_or_buf='../data/searoute/data/in/searoutes.csv',
    index=False
)

In [None]:
codes = list()

for c in terminals_df['terminal_unlocode'].unique().tolist():
    if c != '' and c not in codes:
        codes.append(c)

for c in terminals_df['terminal_unlocode_alt'].unique().tolist():
    if c != '' and c not in codes:
        codes.append(c)
        
codes

In [None]:
# ports_with_country_names_df[ports_with_country_names_df.port_unlocode.isin(codes)]

In [None]:
ports_df[ports_df.port_unlocode.isin(codes)]

In [None]:
df_smdg[df_smdg.terminal.isin(terminals_df['id'].unique().tolist())]

In [None]:
gdf = geopandas.GeoDataFrame(nga, geometry=geometry, crs="EPSG:4326")

In [None]:
gdf.head()

In [None]:
gdf.crs

In [None]:
gdf.to_file('../data/interim/' + 'nga-wpi_v1.json', driver='GeoJSON')

In [None]:
gdf.to_file('../data/interim/' + 'nga-wpi_v1.gpkg', driver='GPKG')

In [None]:
# are these ports missing??
salem = '8025'
cork = '34360'
london = '31470'