In [4]:
#This python script will try to predict the amount of ships at a specific location
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import json
from datetime import datetime
from collections import defaultdict

# Set the folder path where your files are stored
ml_path = os.getcwd() + r'/Git_projects/ME44312-Machine-Learning'
folder_path = os.getcwd() + r'/Git_projects/ME44312-Machine-Learning/raw_data_rotterdam'

#ml_path = os.getcwd()
#folder_path = os.getcwd() + r'/raw_data_rotterdam'

print('path = ',folder_path)

# Get a list of all JSON files in the folder
file_list = glob.glob(os.path.join(folder_path, "raw_ais_data_*.json"))

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each file and read the JSON data
for file in file_list:
    df = pd.read_json(file)  # Read the JSON file into a DataFrame
    dfs.append(df)  # Add it to the list


# Combine all DataFrames into one
full_data = pd.concat(dfs, ignore_index=True)

'''
Every file contains the ais data of different ships in 1 day!
Dict summary:
- navigation
    - draught
    - time
    - speed
    - heading
    - location
        - long
        - lat
    - course
    - destination
        - name
        - eta
    - status
- device
    - dimensions
        - to_port
        - to_bow
        - to_stern
        - to_starboard
    - mmsi
- vessel
    - callsign
    - subtype
    - type
    - imo
    - name
'''

# We want this structure:
'''
- boats
    - mmsi(primary key)
    - name
    - to_port
    - to_bow
    - to_stern
    - to_starboard
    - callsign
    - subtype
    - type
    - imo
    - trips
        - eta
        - arrival
            - long
            - lat
        - departure
            - long
            - lat
        - departure_time
        - arrival_time
        - elapsed_time
        - recordings
            - draught
            - time
            - speed
            - heading
            - location
                - long
                - lat
            - course
'''

boats = {}
unique_mmsi = []

#First, create a list which contains all the unique boats
for index in range(0, full_data.shape[0]):
    if(full_data.iloc[index, 0]['device']['mmsi'] not in unique_mmsi):
        if(full_data.iloc[index, 0]['vessel']['name'] == ''):
            continue

        if(full_data.iloc[index, 0]['device']['dimensions']['to_port'] == 0):
            continue

        if(full_data.iloc[index, 0]['device']['dimensions']['to_bow'] == 0):
            continue

        if(full_data.iloc[index, 0]['device']['dimensions']['to_stern'] == 0):
            continue

        if(full_data.iloc[index, 0]['vessel']['type'] != 'cargo' and full_data.iloc[index, 0]['vessel']['type'] != 'tanker'):
            continue

        unique_mmsi.append(full_data.iloc[index, 0]['device']['mmsi'])
        boats[full_data.iloc[index, 0]['device']['mmsi']] = {
            'name': full_data.iloc[index, 0]['vessel']['name'],
            'to_port': full_data.iloc[index, 0]['device']['dimensions']['to_port'],
            'to_bow': full_data.iloc[index, 0]['device']['dimensions']['to_bow'],
            'to_stern': full_data.iloc[index, 0]['device']['dimensions']['to_stern'],
            'to_starboard': full_data.iloc[index, 0]['device']['dimensions']['to_starboard'],
            'callsign': full_data.iloc[index, 0]['vessel']['callsign'],
            'subtype': full_data.iloc[index, 0]['vessel']['subtype'],
            'type': full_data.iloc[index, 0]['vessel']['type'],
            'imo': full_data.iloc[index, 0]['vessel']['imo'],
            'trips': []
        }

print(unique_mmsi)

#function to determine if a new trip starts
def is_departing(prev_status, curr_status, prev_speed, curr_speed):
    return prev_status == "moored" and curr_status != "moored" and curr_speed > 2

# Function to determine if a trip ends
def is_arriving(prev_speed, curr_speed, curr_status, timestamp, eta):
    if eta == "unknown":
        return prev_speed > 2 and curr_speed < 1 and curr_status == "moored"
    else:
        possible_to_have_arrived = True
        #check of de ETA ook zegt dat nu ongeveer de trip zou moeten eindigen, anders is het schip wss gewoon ergens aangemeerd en nog niet aangekomen op bestemming
        if np.abs((timestamp - eta).days) > 10:
            possible_to_have_arrived = False #als het verschil tussen de eta en huidige tijd groter is dan 10 dagen, dan is het onwaarschijnlijk dat het schip nu aankomt
    return prev_speed > 2 and curr_speed < 1 and curr_status == "moored" and prev_status != "moored" and possible_to_have_arrived

#Now, add the trips to the boats
ship_logs = defaultdict(list)
for i in range(full_data.shape[0]):    
    mmsi = full_data.iloc[i, 0]['device']['mmsi']
    timestamp = datetime.fromisoformat(full_data.iloc[i, 0]['navigation']['time'].replace("Z", "+00:00"))
    location = (full_data.iloc[i, 0]['navigation']['location']['long'], full_data.iloc[i, 0]['navigation']['location']['lat'])
    speed = full_data.iloc[i, 0]['navigation']['speed']
    status = full_data.iloc[i, 0]['navigation']['status']
    draught = full_data.iloc[i, 0]['navigation']['draught']
    heading = full_data.iloc[i, 0]['navigation']['heading']
    course = full_data.iloc[i, 0]['navigation']['course']
    destination = full_data.iloc[i, 0]['navigation']['destination']['name']
    # print(full_data.iloc[i, 0]['navigation']['destination']['eta'])
    eta = "unknown" if full_data.iloc[i, 0]['navigation']['destination']['eta'] is None else datetime.fromisoformat(full_data.iloc[i, 0]['navigation']['destination']['eta'].replace("Z", "+00:00"))
    ship_logs[mmsi].append((timestamp, location, speed, status, draught, heading, course, destination, eta))

for mmsi, logs in ship_logs.items():
    if mmsi in unique_mmsi:
            
        logs.sort() # Sort the logs by timestamp
        # initiate logs
        prev_status, prev_speed = "moored", 0
        trip_start, start_location, start_time, trip_destination = None, None, None, None
        currently_on_trip = False
        for timestamp, location, speed, status, draught, heading, course, destination, eta in logs:

            if status in ["under-way-using-engine", "under-way-sailing"]:
                currently_on_trip = True
                if boats[mmsi]['trips'] == []:
                    boats[mmsi]['trips'].append({
                        'departure_time': "unknown",
                        'depature': "unknown",
                        'arrival' : "Not yet arrived",
                        'arrival_time': "Not yet arrived",
                        'elapsed_time': "Not yet arrived",
                        'recordings': []
                    })
         
            if is_departing(prev_status, status, prev_speed, speed) and not currently_on_trip:
                trip_start = location
                start_time = timestamp
                trip_destination = destination
                currently_on_trip = True
                boats[mmsi]['trips'].update({
                    'departure': trip_start,
                    'departure_time': start_time,
                    'arrival' : "Not yet arrived",
                    'arrival_time': "Not yet arrived",
                    'elapsed_time': "Not yet arrived",
                    'recordings': []
                })

                
            if currently_on_trip and is_arriving(speed, prev_speed, status, timestamp, eta):
                trip_end = location
                end_time = timestamp
                elapsed_time = end_time - start_time
                currently_on_trip = False
                boats[mmsi]['trips'][-1].update({
                    'arrival' : trip_end,
                    'arrival_time': end_time,
                    'elapsed_time': elapsed_time,
                })
                trip_start = None

            if currently_on_trip:
                if not status in ["moored", "fishing", "at anchor"]:
                    boats[mmsi]['trips'][-1]['recordings'].append({
                        'draught': draught,
                        'time': timestamp,
                        'speed': speed,
                        'heading': heading,
                        'location' : location,
                        'course': course,
                        'eta' : eta
                    })

            prev_status, prev_speed = status, speed


output_name = ml_path + r'/output.txt'
with open(output_name, "w") as file:
    json.dump(boats, file, indent=4, default=str)



path =  C:\Users\stefh\PycharmProjects\ME44312-project/raw_data_rotterdam


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x000001EE53559710>>
Traceback (most recent call last):
  File "C:\Users\stefh\PycharmProjects\ME44312-project\.venv\Lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


[211560210, 246046000, 244010773, 244650759, 244750043, 309937000, 244630718, 244700820, 219578000, 219019094]


In [5]:
import folium

In [28]:
key = list(cleaned_data.keys())[0]
for key in cleaned_data.keys():
   print(key, 'nr of trips', len(cleaned_data[key]['trips']))

#print(cleaned_data['211560210']['trips'])
print('first one: ', cleaned_data['211560210']['trips'][0]['recordings'][5130])

211560210 nr of trips 1
246046000 nr of trips 1
244010773 nr of trips 1
244650759 nr of trips 0
244750043 nr of trips 1
309937000 nr of trips 1
244630718 nr of trips 1
244700820 nr of trips 0
219578000 nr of trips 1
219019094 nr of trips 1


IndexError: list index out of range

In [6]:

with open(output_name, "r") as file:
    cleaned_data = json.load(file)

In [9]:
# Create a map
m = folium.Map(location=[51.9225, 4.47917], zoom_start=10)


colors = ['blue', 'red', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']
# Plot ship tracks
index = 0
for mmsi, items in cleaned_data.items():
    for i, trip in enumerate(items['trips']):
        track = [[rec['location'][1], rec['location'][0]] for rec in trip['recordings']] # lat, lon ipv lon, lat
        folium.PolyLine(track, color=colors[index], weight=2.5, opacity=0.7).add_to(m)
        index += 1


m.save("ship_routes.html")
#m