In [8]:
import os
import json
import pandas as pd
import matplotlib.pyplot  as plt
import folium
from folium.plugins import MarkerCluster

AIS_path = r'.\Data'
AIS_data = []

# Iterate over files in the directory
for file_name in os.listdir(AIS_path):
    # Construct full file path
    file_path = os.path.join(AIS_path, file_name)
    
    # Check if the path is a file (not a directory)
    if os.path.isfile(file_path):
        # Open and read the file
        with open(file_path) as f:
            # Load JSON data from the file
            json_data = json.load(f)
            # Append loaded data to the list
            AIS_data.append(json_data)

# Normalize the JSON data
data_normalized = pd.json_normalize(AIS_data, 'data')

# Now you can work with your DataFrame 'data_normalized'
data_normalized.head(5)

Unnamed: 0,navigation.draught,navigation.time,navigation.speed,navigation.heading,navigation.location.long,navigation.location.lat,navigation.course,navigation.destination.name,navigation.destination.eta,navigation.status,device.dimensions.to_port,device.dimensions.to_bow,device.dimensions.to_stern,device.dimensions.to_starboard,device.mmsi,vessel.callsign,vessel.subtype,vessel.type,vessel.imo,vessel.name
0,3.0,2021-01-01T00:04:34+00:00,0.0,,4.59392,51.68819,16.8,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER
1,3.0,2021-01-01T00:10:34+00:00,0.0,,4.59381,51.68838,8.7,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER
2,3.0,2021-01-01T00:16:34+00:00,0.0,,4.59402,51.68827,11.6,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER
3,3.0,2021-01-01T00:16:34+00:00,0.0,,4.59402,51.68827,11.6,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER
4,3.0,2021-01-01T00:22:33+00:00,0.0,,4.59388,51.68833,2.1,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER


In [17]:
# data_normalized['vessel.type'].unique()
# data_normalized['vessel.name'].unique()
# data_normalized['navigation.status'].unique()
# data_normalized['navigation.destination.name'].unique()

# filter de dubbele waardes van navigation.time eruit bij dezelfde vessel.imo

data_normalized['navigation.time'] = pd.to_datetime(data_normalized['navigation.time'])
data_normalized = data_normalized.sort_values(by=['navigation.time'])
data_normalized = data_normalized.drop_duplicates(subset=['vessel.imo', 'navigation.time'], keep='last')

# filter op de coordinaten van de haven in rotterdam 
# gooi waardes groter of kleiner dan de grens voor navigation.location.long en navigation.location.lat eruit

# Define boundaries of the Rotterdam port area
min_longitude = 3.8
max_longitude = 4.6
min_latitude = 51
max_latitude = 52.5

# Filter out data points outside the port area boundaries
data_normalized = data_normalized[(data_normalized['navigation.location.long'] >= min_longitude) &
                                  (data_normalized['navigation.location.long'] <= max_longitude) &
                                  (data_normalized['navigation.location.lat'] >= min_latitude) &
                                  (data_normalized['navigation.location.lat'] <= max_latitude)]

# Filter de moored en at-anchor eruit voor de navigation.status
data_normalized = data_normalized[(data_normalized['navigation.status'] == 'moored') |
                                  (data_normalized['navigation.status'] == 'at-anchor')]

# tel nu het aantal at_anchor en moored values
print(data_normalized['navigation.status'].value_counts())

# verwijder de rijen met een vessel.name die 2 of minder keer voorkomt

# Bepaal het aantal voorkomens van elke 'vessel.name'
vessel_name_counts = data_normalized['vessel.name'].value_counts()

# Filter de rijen waarvan de 'vessel.name' twee keer of vaker voorkomt
data_normalized = data_normalized[data_normalized['vessel.name'].isin(vessel_name_counts.index[vessel_name_counts > 2])]

print(data_normalized['vessel.name'].value_counts())



data_normalized

navigation.status
moored    19495
Name: count, dtype: int64
vessel.name
JORDY-M           5555
JAN LEEGHWATER    5057
ROMEE             2936
CAAN              1794
HELENA            1744
AARBURG            912
MARIBO MAERSK      649
ORION              559
SUNMI              289
Name: count, dtype: int64


Unnamed: 0,navigation.draught,navigation.time,navigation.speed,navigation.heading,navigation.location.long,navigation.location.lat,navigation.course,navigation.destination.name,navigation.destination.eta,navigation.status,device.dimensions.to_port,device.dimensions.to_bow,device.dimensions.to_stern,device.dimensions.to_starboard,device.mmsi,vessel.callsign,vessel.subtype,vessel.type,vessel.imo,vessel.name
994,2.5,2021-01-01 00:00:11+00:00,0.0,,4.01250,51.95848,0.0,RDAM MAASVLAKTE RDAM,2021-06-18T20:00:00+00:00,moored,6,111,0,6,244630718,,,cargo,0,JORDY-M
0,3.0,2021-01-01 00:04:34+00:00,0.0,,4.59392,51.68819,16.8,"MOERDIJKE, ROTTERDAM",2021-11-25T14:00:00+00:00,moored,0,0,0,0,245503000,PBTD,,dredging-underwater-ops,9516650,JAN LEEGHWATER
995,2.5,2021-01-01 00:06:09+00:00,0.0,,4.01250,51.95848,0.0,RDAM MAASVLAKTE RDAM,2021-06-18T20:00:00+00:00,moored,6,111,0,6,244630718,,,cargo,0,JORDY-M
101,2.5,2021-01-01 00:06:28+00:00,0.0,,4.00828,51.95784,252.0,MAASVLAKTE,2021-12-31T16:59:00+00:00,moored,7,120,15,4,211560210,DB4165,,cargo,0,AARBURG
996,2.5,2021-01-01 00:09:10+00:00,0.0,,4.01250,51.95848,0.0,RDAM MAASVLAKTE RDAM,2021-06-18T20:00:00+00:00,moored,6,111,0,6,244630718,,,cargo,0,JORDY-M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78873,2.6,2021-01-31 23:48:49+00:00,0.0,66.0,4.42495,51.69706,21.9,BASEL,2021-11-16T15:05:00+00:00,moored,5,120,15,6,244650759,PC5409,,cargo,0,HELENA
77535,15.5,2021-01-31 23:49:57+00:00,0.0,216.0,4.00390,51.95580,216.0,EGSUC>NLROT,2021-01-29T12:30:00+00:00,moored,30,146,253,30,219019094,OWJJ2,hazardous-cat-a,cargo,9619969,MARIBO MAERSK
77536,15.5,2021-01-31 23:55:57+00:00,0.0,216.0,4.00390,51.95580,216.0,EGSUC>NLROT,2021-01-29T12:30:00+00:00,moored,30,146,253,30,219019094,OWJJ2,hazardous-cat-a,cargo,9619969,MARIBO MAERSK
78592,2.5,2021-01-31 23:56:15+00:00,0.0,,4.03464,51.95739,0.0,RDAM MAASVLAKTE RDAM,2021-06-18T20:00:00+00:00,moored,6,111,0,6,244630718,,,cargo,0,JORDY-M


In [None]:
locations = plt.scatter(data_normalized['navigation.location.long'], data_normalized['navigation.location.lat'])

In [None]:
# Create a folium map centered at the mean latitude and longitude
map_center = [data_normalized['navigation.location.lat'].mean(), data_normalized['navigation.location.long'].mean()]
m = folium.Map(location=map_center, zoom_start=10)

marker_cluster = MarkerCluster().add_to(m)

for index, row in data_normalized.iterrows():
    folium.Marker(location=[row['navigation.location.lat'], row['navigation.location.long']]).add_to(marker_cluster)

# scatter_html = mplleaflet.fig_to_html(plt.gcf())

# # Create a folium iframe to embed the scatterplot HTML
# scatter_frame = folium.IFrame(html=scatter_html, width=500, height=300)
# scatter_popup = folium.Popup(scatter_frame, max_width=500)

# Add the scatterplot as a popup to the folium map
# folium.Marker(location=map_center, popup=scatter_popup).add_to(m)
    
# m.save('ais_map_with_scatterplot.html')
# m
