In [2]:
import pandas as pd

# load csv files
ais_data = pd.read_csv("dataset/ais_dataset.csv")
distance_data = pd.read_csv("dataset/distance_dataset.csv")
port_call_data = pd.read_csv("dataset/port_call_dataset.csv")
aux_emission_factors = pd.read_csv("supplementary_tables/auxiliary_engine_emission_factors.csv")
boiler_emission_factors = pd.read_csv("supplementary_tables/boiler_engine_emission_factors.csv")


In [8]:
# first few roles of each dataset
print(ais_data.head())
print(ais_data.info())

print(distance_data.head())
print(distance_data.info())

print(port_call_data.head())
print(port_call_data.info())


       imo       mmsi     vessel_name             date_of_build   vessel_type  \
0  9984730  636023994  SEACON ANTWERP  2024-06-06T00:00:00.000Z  Bulk Carrier   
1  9984730  636023994  SEACON ANTWERP  2024-06-06T00:00:00.000Z  Bulk Carrier   
2  9984730  636023994  SEACON ANTWERP  2024-06-06T00:00:00.000Z  Bulk Carrier   
3  9984730  636023994  SEACON ANTWERP  2024-06-06T00:00:00.000Z  Bulk Carrier   
4  9984730  636023994  SEACON ANTWERP  2024-06-06T00:00:00.000Z  Bulk Carrier   

          group                 timestamp         lon       lat  nav_stat  \
0  Bulk Carrier  2024-07-28T00:23:32.000Z  104.029630  1.283717         0   
1  Bulk Carrier  2024-07-28T00:25:02.000Z  104.029755  1.285643         0   
2  Bulk Carrier  2024-07-28T00:30:01.000Z  104.029420  1.291690         0   
3  Bulk Carrier  2024-07-28T00:32:51.000Z  104.028160  1.294758         0   
4  Bulk Carrier  2024-07-28T00:35:02.000Z  104.026474  1.296387         0   

   speed  sfc_me  sfc_ae  sfc_ab    ael    abl  di

In [None]:

# dummy dataframes
ais_data = pd.DataFrame({
    'imo': [9984730] * 5,
    'mmsi': [636023994] * 5,
    'vessel_name': ['SEACON ANTWERP'] * 5,
    'date_of_build': ['2024-06-06T00:00:00.000Z'] * 5,
    'vessel_type': ['Bulk Carrier'] * 5,
    'group': ['Bulk Carrier'] * 5,
    'timestamp': [
        '2024-07-28T00:23:32.000Z', '2024-07-28T00:25:02.000Z',
        '2024-07-28T00:30:01.000Z', '2024-07-28T00:32:51.000Z',
        '2024-07-28T00:35:02.000Z'
    ],
    'lon': [104.02963, 104.029755, 104.02942, 104.02816, 104.026474],
    'lat': [1.283717, 1.285643, 1.29169, 1.294758, 1.296387],
    'nav_stat': [0] * 5,
    'speed': [14.1] * 5,
    'sfc_me': [167.1] * 5,
    'sfc_ae': [209.1] * 5,
    'sfc_ab': [300.0] * 5,
    'ael': [316.0] * 5,
    'abl': [138.0] * 5,
    'distance': [6.221528, 0.11591, 0.363603, 0.199119, 0.14072],
    'berth': [None] * 5,
    'terminal': [None] * 5,
    'maneuvering_zone': [None] * 5
})

distance_data = pd.DataFrame({
    'snapshot_date_utc': [
        '2024-07-24', '2024-07-20', '2024-07-13',
        '2024-07-12', '2024-07-08'
    ],
    'imo': [1013327, 1015820, 1028437, 1028437, 1043011],
    'distance_nm': [29.84, 65.42, 25.46, 0.66, 79.17],
    'duration_minutes': [1184.43, 1312.65, 178.77, 1253.67, 1112.48],
    'avg_speed_kt': [2.19, 3.18, 6.75, 0.03, 4.45],
    'min_speed_kt': [0.03, 0.02, 4.37, 0.03, 0.03],
    'max_speed_kt': [7.83, 11.91, 10.61, 0.03, 14.48]
})

port_call_data = pd.DataFrame({
    'current_snapshot_date_utc': [
        '24/7/2024', '20/7/2024', '13/7/2024', '8/7/2024', '2/7/2024'
    ],
    'imo': [1013327, 1015820, 1028437, 1043011, 8353245],
    'current_port_name': ['Singapore'] * 5,
    'current_port_country_name': ['Singapore'] * 5,
    'current_earliest_visit_time_utc': [
        '2024-07-24T12:03:26.000+0000', '2024-07-20T08:10:02.000+0000',
        '2024-07-13T22:51:23.000+0000', '2024-07-08T11:10:58.000+0000',
        '2024-07-02T19:50:23.000+0000'
    ],
    'prev_port_name': ['TanjungPelepas', 'PasirGudang', 'PortKlang', 'Shanghai', 'MYMUA'],
    'prev_country_name': ['Malaysia', 'Malaysia', 'Malaysia', 'China', 'Malaysia']
})

# clean ais_data
ais_data_cleaned = ais_data.drop(columns=['berth', 'terminal', 'maneuvering_zone'])
ais_data_cleaned['date_of_build'] = pd.to_datetime(ais_data_cleaned['date_of_build'])
ais_data_cleaned['timestamp'] = pd.to_datetime(ais_data_cleaned['timestamp'])

# clean distance_data
distance_data_cleaned = distance_data.copy()
distance_data_cleaned['snapshot_date_utc'] = pd.to_datetime(distance_data_cleaned['snapshot_date_utc'])

# clean port_call_data
port_call_data_cleaned = port_call_data.copy()
port_call_data_cleaned['current_snapshot_date_utc'] = pd.to_datetime(
    port_call_data_cleaned['current_snapshot_date_utc'], format='%d/%m/%Y'
)
port_call_data_cleaned['current_earliest_visit_time_utc'] = pd.to_datetime(
    port_call_data_cleaned['current_earliest_visit_time_utc']
)

# print cleaned datasets
ais_data_cleaned.head(), distance_data_cleaned.head(), port_call_data_cleaned.head()


(       imo       mmsi     vessel_name             date_of_build   vessel_type  \
 0  9984730  636023994  SEACON ANTWERP 2024-06-06 00:00:00+00:00  Bulk Carrier   
 1  9984730  636023994  SEACON ANTWERP 2024-06-06 00:00:00+00:00  Bulk Carrier   
 2  9984730  636023994  SEACON ANTWERP 2024-06-06 00:00:00+00:00  Bulk Carrier   
 3  9984730  636023994  SEACON ANTWERP 2024-06-06 00:00:00+00:00  Bulk Carrier   
 4  9984730  636023994  SEACON ANTWERP 2024-06-06 00:00:00+00:00  Bulk Carrier   
 
           group                 timestamp         lon       lat  nav_stat  \
 0  Bulk Carrier 2024-07-28 00:23:32+00:00  104.029630  1.283717         0   
 1  Bulk Carrier 2024-07-28 00:25:02+00:00  104.029755  1.285643         0   
 2  Bulk Carrier 2024-07-28 00:30:01+00:00  104.029420  1.291690         0   
 3  Bulk Carrier 2024-07-28 00:32:51+00:00  104.028160  1.294758         0   
 4  Bulk Carrier 2024-07-28 00:35:02+00:00  104.026474  1.296387         0   
 
    speed  sfc_me  sfc_ae  sfc_ab   

How much time do ships spend at port before berthing?

In [11]:
ais_data["timestamp"] = pd.to_datetime(ais_data["timestamp"])
latest_timestamps = ais_data.groupby("imo")["timestamp"].max().reset_index()

print(latest_timestamps)

       imo                 timestamp
0  9984730 2024-07-28 00:35:02+00:00


In [16]:
port_call_data = pd.read_csv("dataset/port_call_dataset.csv", parse_dates=[
    "current_earliest_visit_time_utc",
    "current_latest_visit_time_utc"
])

print(port_call_data.columns)

Index(['current_snapshot_date_utc', 'imo', 'current_port_name',
       'current_port_country_name', 'current_earliest_visit_time_utc',
       'current_latest_visit_time_utc', 'current_zone_polygon_name',
       'current_zone_type', 'next_port_name', 'next_country_name',
       'next_country_earliest_visit_time_utc',
       'next_country_latest_visit_time_utc', 'next_zone_polygon_name',
       'next_snapshot_date', 'next_zone_type', 'prev_port_name',
       'prev_country_name', 'prev_country_earliest_visit_time_utc',
       'prev_country_latest_visit_time_utc', 'prev_zone_polygon_name',
       'prev_snapshot_date', 'prev_zone_type'],
      dtype='object')


How much time do vessels spend waiting before berthing?

Waiting Time=current_latest_visit_time_utc−current_earliest_visit_time_utc


In [17]:
port_call_data["waiting_time_minutes"] = (
    pd.to_datetime(port_call_data["current_latest_visit_time_utc"]) - 
    pd.to_datetime(port_call_data["current_earliest_visit_time_utc"])
).dt.total_seconds() / 60  # Convert seconds to minutes

# view waiting times
print(port_call_data[["imo", "current_port_name", "waiting_time_minutes"]].head())

# average waiting time per port
avg_waiting_time = port_call_data.groupby("current_port_name")["waiting_time_minutes"].mean()
print(avg_waiting_time)


       imo current_port_name  waiting_time_minutes
0  1013327         Singapore             11.833333
1  1015820         Singapore            947.700000
2  1028437         Singapore              0.000000
3  1043011         Singapore            766.633333
4  8353245         Singapore            246.000000
current_port_name
Singapore    235.794299
Name: waiting_time_minutes, dtype: float64


At what speed should the vessel be travelling to be able to arrive at the port in time to reduce waiting for berthing?

required speed = dist/time where time available=waiting time+remaining voyage duration

In [21]:
# merge distance data with port call data on IMO
merged_data = port_call_data.merge(distance_data, on="imo", how="left")

# calculate the time available before arrival
merged_data["time_available_hours"] = (merged_data["waiting_time_minutes"] + merged_data["duration_minutes"]) / 60

# calculate the required speed (knots)
merged_data["required_speed_knots"] = merged_data["distance_nm"] / merged_data["time_available_hours"]

# view the recommended speeds
print(merged_data[["imo", "current_port_name", "required_speed_knots"]].head())

# average speed needed per port
avg_speed_needed = merged_data.groupby("current_port_name")["required_speed_knots"].mean()
print(avg_speed_needed)


       imo current_port_name  required_speed_knots
0  1013327         Singapore              1.496660
1  1015820         Singapore              1.736545
2  1028437         Singapore              8.545058
3  1028437         Singapore              0.031587
4  1043011         Singapore              2.527894
current_port_name
Singapore    2.867549
Name: required_speed_knots, dtype: float64


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sample waiting times dataset (replace with actual data)
waiting_times = pd.DataFrame({
    'imo': [1013327, 1015820, 1028437, 1043011, 8353245],
    'current_port_name': ['Singapore']*5,
    'waiting_time_minutes': [11.83, 947.7, 0.0, 766.63, 246.0]
})

# Sample required speed dataset (replace with actual data)
required_speeds = pd.DataFrame({
    'imo': [1013327, 1015820, 1028437, 1028437, 1043011],
    'current_port_name': ['Singapore']*5,
    'required_speed_knots': [1.49, 1.73, 8.54, 0.03, 2.52]
})

# Merge datasets
jit_data = pd.merge(waiting_times, required_speeds, on=['imo', 'current_port_name'], how='inner')

# Categorize waiting times
jit_data['category'] = pd.cut(
    jit_data['waiting_time_minutes'], 
    bins=[-1, 60, 240, 1440], 
    labels=['Short (0-1 hr)', 'Medium (1-4 hrs)', 'Long (4+ hrs)']
)

# Adjust speeds based on waiting times
jit_data['adjusted_speed_knots'] = np.where(
    jit_data['waiting_time_minutes'] > 240, jit_data['required_speed_knots'] * 0.8,  # Reduce speed by 20% for long waits
    np.where(jit_data['waiting_time_minutes'] > 60, jit_data['required_speed_knots'] * 0.9, jit_data['required_speed_knots'])
)

# Visualization: Waiting times vs. required speeds
plt.figure(figsize=(10, 6))
sns.scatterplot(data=jit_data, x='waiting_time_minutes', y='required_speed_knots', hue='category', size='adjusted_speed_knots', sizes=(20, 200))
plt.xlabel('Waiting Time (minutes)')
plt.ylabel('Required Speed (knots)')
plt.title('Just-In-Time Arrival Analysis: Waiting Time vs. Required Speed')
plt.legend(title='Wait Time Category')
plt.show()

# Save adjusted data
jit_data.to_csv('jit_optimized_speeds.csv', index=False)
print(jit_data)
