In [1]:
import pandas as pd
from typing import List
import geopy.distance

# Arrival

In [2]:
arrival_df = pd.read_excel('Data/arrivals.xlsx', sheet_name=1)
arrival_df.head(1)

Unnamed: 0,Vessel Name,Port Call Type,Port Type,Port At Call,Port At Call Country,Latitude,Longitude,Ata/atd,Time At Port,Port Country,Port Location,Type,Remarks,Latitude (generated),Longitude (generated),Total Capcity of Methanol Storage ( In Metric Tonnes)
0,SEABOURN VENTURE,DEPARTURE,Port,REYKJAVIK,IS,64.1265,-21.8174,2023-06-04 20:07:00,14h 19m,China,Zhuhai,Confirmed Methanol Supply / Storage,,22.2667,113.5667,3957.0


In [48]:
# Preprocess Arrivals df
arrival_df.rename(columns={
    'Port At Call': 'location', 
    'Port At Call Country': 'country',
    'Latitude': 'lat',
    'Longitude': 'long',
    'Ata/atd': 'time'
    }, inplace=True)
arrival_df['location']=arrival_df.location.map(lambda y: y.title())
arrival_df=arrival_df.sort_values('time', ascending=True).reset_index(drop=True)
arrival_df = arrival_df [['location', 'country', 'lat', 'long', 'time']]
arrival_df.head(1)

Unnamed: 0,location,country,lat,long,time
0,Genova,IT,44.4056,8.9463,2022-07-01 16:45:00


# Methanol

In [4]:
methanol_df = pd.read_excel('Data/emethanol.xlsx')
methanol_df.head(1)

Unnamed: 0,Latitude,Longtitude,Capacity (t/y),City,Company,Country,Feedstock
0,63.868877,20.411105,100000.0,Umeå,Liquid Wind and Umeå Energi,Sweden,CO2 and H2 from water electrolysis


In [5]:
# Preprocess methanol df
methanol_df.rename(columns={
    'City': 'location', 
    'Country': 'country',
    'Latitude': 'lat',
    'Longtitude': 'long',
    }, inplace=True)
methanol_df = methanol_df [['location', 'country', 'lat', 'long']]
methanol_df.head(1)

Unnamed: 0,location,country,lat,long
0,Umeå,Sweden,63.868877,20.411105


# Compute

In [40]:
def compute_distance(A: pd.DataFrame, B: pd.DataFrame, max_distance: int) -> pd.DataFrame:
    result = pd.DataFrame()
    for a_index, Arow in A.iterrows():
        for b_index, Brow in B.iterrows():
            coords_1 = (Arow.lat, Arow.long)
            coords_2 = (Brow.lat, Brow.long)
            dist=(geopy.distance.geodesic(coords_1, coords_2))
            if dist.km<= max_distance:
                new_row = {}
                new_row['arrival_idx'] = a_index
                new_row['arrival_time']= Arow.time
                new_row['arrival_location'] = Arow.location
                new_row['arrival_country'] = Arow.country
                new_row['methanol_location'] = Brow.location
                new_row['methanol_country'] = Brow.country
                new_row['km_distance'] = round(dist.km, 1)
                new_row['nm_distance'] = round(dist.nautical, 1)
                row_df = pd.DataFrame(new_row, index=[0])
                result = pd.concat([result, row_df])
    return result.reset_index(drop=True)

In [41]:
def remove_duplicates(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
    return df.sort_values(by=columns, ascending=True).drop_duplicates(subset='arrival_location').sort_values(by='arrival_idx', ascending=True).reset_index(drop=True)

In [50]:
def compute_beetween_stops_analytics(df: pd.DataFrame) -> pd.DataFrame:
    previous_row = {
        "arrival_idx": -1,
        "arrival_time": df.iloc[0].arrival_time
    }
    for index, row in df.iterrows():
        df.at[index, 'stops_in_between'] = row.arrival_idx - previous_row["arrival_idx"] - 1
        df.at[index, 'time_in_between'] = row.arrival_time - previous_row["arrival_time"]

        previous_row["arrival_idx"] = row.arrival_idx
        previous_row["arrival_time"] = row.arrival_time
    
    df['stops_in_between']=df['stops_in_between'].astype(int)
    return df.drop(columns=['arrival_idx'])

In [53]:
# 1 degree of latitude, called an arc degree, covers about 111 kilometers 
res=compute_distance(arrival_df, methanol_df, 111)
res1=remove_duplicates(res, ['arrival_location', 'km_distance'])
res2=compute_beetween_stops_analytics(res1)
res2

Unnamed: 0,arrival_idx,arrival_time,arrival_location,arrival_country,methanol_location,methanol_country,km_distance,nm_distance
0,0,2022-07-01 16:45:00,Genova,IT,Genoa,Italy,1.0,0.5
1,5,2022-07-18 15:26:00,Gibraltar,GI,Algeciras,Spain,9.1,4.9
2,6,2022-07-23 13:17:00,Rotterdam Centrum,NL,Rotterdam,Netherlands,4.4,2.4
3,15,2022-08-21 19:35:00,Reykjavik,IS,Grindavík,Iceland,41.1,22.2
4,18,2022-08-31 21:41:00,Akureyri,IS,Húsavík,Iceland,53.1,28.7
5,109,2023-05-17 13:45:00,Clydeport Greenock,GB,Grangemouth,UK,65.8,35.5
6,114,2023-05-24 17:00:00,Leith,GB,Grangemouth,UK,35.0,18.9
7,119,2023-06-01 18:26:00,Siglufjordur,IS,Húsavík,Iceland,72.0,38.9
