In [None]:
import seaconex

import camelot
import requests
import time
import lxml
import re
import json
import os
import fiona

import pandas as pd
import numpy as np
import geopandas as gpd
import networkx as nx

from bs4 import BeautifulSoup
from shapely.geometry import Point, LineString

## SMDG Terminal Code List (TCL)

https://smdg.org/documents/smdg-code-lists/smdg-terminal-code-list/  
https://github.com/smdg-org/Terminal-Code-List  
https://www.google.com/maps/d/viewer?mid=1nbJI9NkpAayVpSJqy13qJ2xhatcIKQJK&ll=-3.81666561775622e-14%2C-4.5703125&z=2  

In [None]:
smdg_tcl_path = '../data/raw/smdg-tcl-v20210401.csv'

if not os.path.exists(smdg_tcl_path):
    url = 'https://raw.githubusercontent.com/smdg-org/Terminal-Code-List/8886d86445d0a53e5495a4aeb11beedc92bd20cb/SMDG%20Terminal%20Code%20List.csv'
    # alt url in .xlsx
    # https://smdg.org/wp-content/uploads/Codelists/Terminals/SMDG-Terminal-Code-List-v20210401.xlsx
    r = requests.get(url)
    with open(smdg_tcl_path, 'wb') as f:
        f.write(r.content)

In [None]:
df_smdg = pd.read_csv(smdg_tcl_path).rename(
    columns={
        'UNLOCODE':'port_unlocode',
        'Alternative UNLOCODEs':'port_unlocode_alt',
        'Terminal Facility Name':'terminal_name',
        'Terminal Company Name':'terminal_operator',
        'Terminal Website':'terminal_website',
        'Terminal Address':'terminal_address'
    }
)

In [None]:
df_smdg.columns.tolist()

In [None]:
df_smdg.head()

# ICL

In [None]:
icl_schedule_path = '../data/raw/icl-schedule-v030521.pdf'

if not os.path.exists(icl_schedule_path):
    url = 'https://www.icl-ltd.com/media/2305/sailing-schedule-updated-3-5-2021-web.pdf'
    r = requests.get(url)
    with open(icl_schedule_path, 'wb') as f:
        f.write(r.content)

In [None]:
tables = camelot.read_pdf(icl_schedule_path)

In [None]:
# tables[0]

In [None]:
tables[0].parsing_report

In [None]:
# tables[0].df.head()

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
# tables[0].df.head(2)

In [None]:
df_tac1 = tables[0].df.head(2)

In [None]:
# df_tac1

In [None]:
# df_tac1.info()

In [None]:
# df_tac1.notna()

In [None]:
# print(df_tac1)

In [None]:
# df_tac1.columns

In [None]:
df_tac1 = df_tac1.transpose()
# df_tac1

In [None]:
# df_tac1.info()

In [None]:
# df_tac1.columns

In [None]:
df_tac1.rename(columns={0:"lane", 1:"carrier_port_call_ref"}, inplace=True)
# df_tac1

In [None]:
df_tac1.drop(index=[0,1,12,13], inplace=True)
# df_tac1

In [None]:
df_tac1.replace('', np.nan, inplace=True)
df_tac1.dropna(axis=0, how='all', inplace=True)
# df_tac1

In [None]:
df_tac1.lane.ffill(inplace=True)
# df_tac1

In [None]:
df_tac1.dropna(axis=0, how='any', inplace=True)
# df_tac1

In [None]:
df_tac1['lane'] = df_tac1['lane'].apply(lambda x: x[0])
# df_tac1['carrier_facility_ref'] = df_tac1['port'].apply(lambda x: x.split(' ')[1])
df_tac1['carrier_port_call_ref'] = df_tac1['carrier_port_call_ref'].apply(lambda x: x.split(' ')[1])
df_tac1['service'] = 'TAC1'
df_tac1['trade'] = 'Trans-Atlantic'
df_tac1['carrier'] = 'ICL'

# df_tac1

In [None]:
# df_tac1.carrier_port_call_ref.unique().tolist()

In [None]:
icl_mto_smdg = {
    'CHE': 'PSAP', 
    'WIL': 'NCSPA', 
    'ANT': 'ACOT', 
    'SOU': 'DPWS', 
    'CORK': 'RDT'
}

icl_port_wpi = {
    'CHE': 8080, 
    'WIL': 8470, 
    'ANT': 31250, 
    'SOU': 35580, 
    'CORK': 34350
}

In [None]:
df_tac1['terminal'] = df_tac1['carrier_port_call_ref'].map(icl_mto_smdg)
df_tac1['wpi'] = df_tac1['carrier_port_call_ref'].map(icl_port_wpi)

In [None]:
df_tac1.drop(columns=['carrier_port_call_ref'], inplace=True)

In [None]:
grouped = df_tac1.groupby('lane')

In [None]:
# grouped.get_group('E').reset_index(drop=True)
# grouped.get_group('W').reset_index(drop=True)

In [None]:
df_tac1 = pd.concat([
    grouped.get_group('E').reset_index(drop=True),
    grouped.get_group('W').reset_index(drop=True)], 
    ignore_index=False)

In [None]:
df_tac1['port_call_seq_no'] = df_tac1.index
df_tac1['terminal_call_seq_no'] = df_tac1.index
df_tac1['transport_type'] = 'vessel'
df_tac1['transport_connection'] = 'direct'
df_tac1['cargo_type_lolo'] = 1
df_tac1['cargo_type_roro'] = 0

In [None]:
df_tac1

In [None]:
terminal_calls = pd.merge(
    left=df_tac1, 
    right=df_smdg, 
    how='left', 
    left_on=['terminal'], 
    right_on=['Terminal Code']
).drop(
    columns=[
#         'lane',
#         'service',
#         'trade',
#         'carrier',
#         'terminal',
#         'wpi',
#         'port_call_seq_no',
#         'terminal_call_seq_no',
#         'transport_type',
#         'connection',
#         'cargo_type_lolo',
#         'cargo_type_roro',
#         'UNLOCODE',
#         'Alternative UNLOCODEs',
        'Terminal Code',
#         'Terminal Facility Name',
#         'Terminal Company Name',
        'Latitude (DMS)',
        'Longitude (DMS)',
#         'Latitude',
#         'Longitude',
        'Last change',
        'Valid from',
        'Valid until',
#         'Terminal Website',
#         'Terminal Address',
        'Remarks'
    ]
).fillna("").assign(obj_type='master_schedules_terminal_call_info')
# terminal_calls['id'] = 'master_schedules_terminal_call_info')

In [None]:
terminal_calls

In [None]:
# terminal_calls.columns.tolist()

In [None]:
gpd.GeoDataFrame(
    terminal_calls,
    crs='EPSG:3857', 
    geometry=gpd.points_from_xy(terminal_calls.Longitude, terminal_calls.Latitude)
).drop(
    columns=['Latitude', 'Longitude']
).to_file(
    '../data/interim/master_schedules_terminal_call_info.geojson',
    driver='GeoJSON',   
)

In [None]:
gpd.GeoDataFrame(
    terminal_calls,
    crs='EPSG:3857', 
    geometry=gpd.points_from_xy(terminal_calls.Longitude, terminal_calls.Latitude)
).drop(
    columns=['Latitude', 'Longitude']
).to_file(
    '../data/interim/master_schedules_terminal_call_info.json',
    driver='GeoJSON',   
)

In [None]:
def transport_calls_to_edges(transport_calls_df):
    gdf = gpd.GeoDataFrame(
        transport_calls_df,
        crs='EPSG:3857', 
        geometry=gpd.points_from_xy(transport_calls_df.Longitude, transport_calls_df.Latitude)
    ).drop(
        columns=['Latitude', 'Longitude']
    )
     
    edge_columns = [ 
        'lane',
        'service', 
        'trade',
        'carrier',
        'transport_edge_no',
        'transport_type',
        'transport_connection',
        'terminal_call_facility_1',
        'terminal_call_facility_2',
        'port_call_unlocode_1',
        'port_call_unlocode_2',
        'port_call_wpi_1',
        'port_call_wpi_2',
        'port_call_seq_no_1',
        'port_call_seq_no_2',
        'terminal_call_seq_no_1',
        'terminal_call_seq_no_2',
        'obj_type',
        'geometry'    
    ]

    edges_df = gpd.GeoDataFrame(
        columns = edge_columns,
        crs='EPSG:3857'
    )

    grouped = gdf.groupby(['lane', 'service', 'trade', 'carrier'])
    keys = list(grouped.groups.keys())

    for k in keys:

        curr_df = grouped.get_group(k)

        curr_row = None
        prev_row = None
        num = -1

        for index, row in curr_df.iterrows():

            if all(v is not None for v in [curr_row, prev_row]):
                if curr_row['lane']==prev_row['lane']:
                    edges_df = edges_df.append(
                        {
                            'lane': prev_row['lane'],
                            'service': prev_row['service'],
                            'trade': prev_row['trade'],
                            'carrier': prev_row['carrier'],
                            'transport_edge_no': prev_row['service'] + '_' + prev_row['lane'] + '_' + str(num),
                            'transport_type': prev_row['transport_type'],
                            'transport_connection': prev_row['transport_connection'],
                            'terminal_call_facility_1': prev_row['terminal'],
                            'terminal_call_facility_2': curr_row['terminal'],
                            'port_call_unlocode_1': prev_row['port_unlocode'],
                            'port_call_unlocode_2': curr_row['port_unlocode'],
                            'port_call_wpi_1': prev_row['wpi'],       
                            'port_call_wpi_2': curr_row['wpi'],
                            'port_call_seq_no_1': prev_row['port_call_seq_no'],
                            'port_call_seq_no_2': curr_row['port_call_seq_no'],
                            'terminal_call_seq_no_1': prev_row['terminal_call_seq_no'],
                            'terminal_call_seq_no_2': curr_row['terminal_call_seq_no'],
                            'obj_type': 'master_schedules_terminal_call_edge',
                            'geometry': LineString([prev_row['geometry'], curr_row['geometry']])
                       }, 
                       ignore_index = True
                    )
                prev_row = curr_row;

            if prev_row is None and curr_row is not None:
                prev_row = curr_row
            curr_row = row;
            num += 1

        if curr_row['lane']==prev_row['lane']:
            edges_df = edges_df.append(
                {
                    'lane': prev_row['lane'],
                    'service': prev_row['service'],
                    'trade': prev_row['trade'],
                    'carrier': prev_row['carrier'],
                    'transport_edge_no': prev_row['service'] + '_' + prev_row['lane'] + '_' + str(num),
                    'transport_type': prev_row['transport_type'],
                    'transport_connection': prev_row['transport_connection'],
                    'terminal_call_facility_1': prev_row['terminal'],
                    'terminal_call_facility_2': curr_row['terminal'],
                    'port_call_unlocode_1': prev_row['port_unlocode'],
                    'port_call_unlocode_2': curr_row['port_unlocode'],
                    'port_call_wpi_1': prev_row['wpi'],       
                    'port_call_wpi_2': curr_row['wpi'],
                    'port_call_seq_no_1': prev_row['port_call_seq_no'],
                    'port_call_seq_no_2': curr_row['port_call_seq_no'],
                    'terminal_call_seq_no_1': prev_row['terminal_call_seq_no'],
                    'terminal_call_seq_no_2': curr_row['terminal_call_seq_no'],
                    'obj_type': 'master_schedules_terminal_call_edge',
                    'geometry': LineString([prev_row['geometry'], curr_row['geometry']])
               }, 
               ignore_index = True
            )
    return edges_df

In [None]:
transport_calls_to_edges(terminal_calls).to_file(
    '../data/interim/master_schedules_edges.json',
    driver='GeoJSON',
)

In [None]:
transport_calls_to_edges(terminal_calls).to_file(
    '../data/interim/master_schedules_edges.geojson',
    driver='GeoJSON',
)

In [None]:
# transport_calls_to_edges(terminal_calls).to_json(
# #     'master_schedules2.json',
# #     driver='GeoJSON',
# )

In [None]:
routes = transport_calls_to_edges(terminal_calls)

In [None]:
routes.head()

In [None]:
acl_schedule_path = '../data/raw/acl-schedule.pdf'

# if not os.path.exists(icl_schedule_path):
#     url = 'https://www.icl-ltd.com/media/2305/sailing-schedule-updated-3-5-2021-web.pdf'
#     r = requests.get(url)
#     with open(icl_schedule_path, 'wb') as f:
#         f.write(r.content)

In [None]:
tables = camelot.read_pdf(acl_schedule_path)

In [None]:
# tables[0]

In [None]:
tables[0].parsing_report

In [None]:
# tables[0].df.head()

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
# tables[0].df.head(2)

In [None]:
df_a = tables[0].df.head(2)

In [None]:
# df_tac1