### Data Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define file paths
ais_tracks_path = '../data/tracks_ais.csv'
radar_tracks_path = '../data/tracks_radar.csv'
radar_detections_path = '../data/detections_radar.csv'
tagged_detections_path = '../data/detections_tagged.csv'
tracks_tagged_path = '../data/tracks_tagged.csv'

ais_tracks = pd.read_csv(ais_tracks_path)
radar_tracks = pd.read_csv(radar_tracks_path)
radar_detections = pd.read_csv(radar_detections_path)
tagged_detections = pd.read_csv(tagged_detections_path)
tracks_tagged = pd.read_csv(tracks_tagged_path)

print("Length of ais_tracks:", len(ais_tracks))
print("Length of radar_tracks:", len(radar_tracks))
print("Length of radar_detections:", len(radar_detections))
print("Length of tagged_detections:", len(tagged_detections))
print("Length of tracks_tagged:", len(tracks_tagged))

Length of ais_tracks: 27298
Length of radar_tracks: 21725
Length of radar_detections: 7387790
Length of tagged_detections: 6756272
Length of tracks_tagged: 9013


In [36]:
radar_tracks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21725 entries, 0 to 21724
Data columns (total 27 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id_track      21725 non-null  int64  
 1   id_site       21725 non-null  int64  
 2   id_m2         21725 non-null  object 
 3   source        21725 non-null  object 
 4   duration      21529 non-null  float64
 5   alarm         21725 non-null  int64  
 6   min_speed     21725 non-null  float64
 7   max_speed     21725 non-null  float64
 8   avg_speed     21529 non-null  float64
 9   curviness     21529 non-null  float64
 10  heading_mean  21529 non-null  float64
 11  heading_std   21529 non-null  float64
 12  turning_mean  21529 non-null  float64
 13  turning_std   21529 non-null  float64
 14  duration_z    21725 non-null  int64  
 15  distance      21725 non-null  float64
 16  distance_o    21725 non-null  float64
 17  assoc_str     21725 non-null  int64  
 18  assoc_id      21725 non-nu

In [None]:
ais_tracks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27298 entries, 0 to 27297
Data columns (total 40 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id_track      27298 non-null  int64  
 1   id_site       27298 non-null  int64  
 2   id_m2         27298 non-null  object 
 3   source        27298 non-null  object 
 4   duration      27150 non-null  float64
 5   alarm         27298 non-null  int64  
 6   min_speed     27298 non-null  float64
 7   max_speed     27298 non-null  float64
 8   avg_speed     27150 non-null  float64
 9   curviness     27150 non-null  float64
 10  heading_mean  27150 non-null  float64
 11  heading_std   27150 non-null  float64
 12  turning_mean  27150 non-null  float64
 13  turning_std   27150 non-null  float64
 14  duration_z    27298 non-null  int64  
 15  distance      27298 non-null  float64
 16  distance_o    27298 non-null  float64
 17  assoc_str     27298 non-null  int64  
 18  assoc_id      27298 non-nu

In [80]:
tracks_tagged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9013 entries, 0 to 9012
Data columns (total 47 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id_track      9013 non-null   int64  
 1   id_site       9013 non-null   int64  
 2   id_m2         9013 non-null   object 
 3   source        9013 non-null   object 
 4   duration      9013 non-null   int64  
 5   alarm         9013 non-null   int64  
 6   min_speed     9013 non-null   float64
 7   max_speed     9013 non-null   float64
 8   avg_speed     9013 non-null   float64
 9   curviness     9013 non-null   float64
 10  heading_mean  9013 non-null   float64
 11  heading_std   9013 non-null   float64
 12  turning_mean  9013 non-null   float64
 13  turning_std   9013 non-null   float64
 14  duration_z    9013 non-null   int64  
 15  distance      9013 non-null   float64
 16  distance_o    9013 non-null   float64
 17  assoc_str     191 non-null    float64
 18  assoc_id      191 non-null  

There might be many-to-one relationship for radar-ais matching:

In [163]:
matched_ais_ids = list(set(ais_tracks["id_track"]) & set(radar_tracks["assoc_id"]))
matched_radar_ids = list(set(radar_tracks["id_track"]) & set(ais_tracks["assoc_id"]))
print(len(matched_ais_ids), len(matched_radar_ids))

16892 16796


In [None]:
# Investigate if multiple radar records are matched to the same ais record:
AIS_matchings = dict.fromkeys(set(ais_tracks["id_track"]), 0)
num_radar_tracks_no_match = 0
for ais_id in radar_tracks["assoc_id"]:
    if ais_id not in AIS_matchings:
        num_radar_tracks_no_match += 1 
    else:
        AIS_matchings[ais_id] += 1
print("Number of radar tracks with no matching: ", num_radar_tracks_no_match)
AIS_match_frequency = dict()
for v in AIS_matchings.values():
    AIS_match_frequency[v] = AIS_match_frequency.get(v, 0) + 1
for k in sorted(AIS_match_frequency.keys()):
    print("Number of AIS records with {:<2} matching radar records: {:<8}".format(k, AIS_match_frequency[k]))

Number of radar tracks with no matching:  1449
Number of AIS records with 0  matching radar records: 10406   
Number of AIS records with 1  matching radar records: 14517   
Number of AIS records with 2  matching radar records: 1805    
Number of AIS records with 3  matching radar records: 320     
Number of AIS records with 4  matching radar records: 159     
Number of AIS records with 5  matching radar records: 50      
Number of AIS records with 6  matching radar records: 24      
Number of AIS records with 7  matching radar records: 4       
Number of AIS records with 8  matching radar records: 5       
Number of AIS records with 9  matching radar records: 5       
Number of AIS records with 10 matching radar records: 1       
Number of AIS records with 11 matching radar records: 1       
Number of AIS records with 25 matching radar records: 1       


In [125]:
# Investigate if multiple ais records are matched to the same radar records:
radar_matchings = dict.fromkeys(set(radar_tracks["id_track"]), 0)
num_ais_tracks_no_match = 0
for radar_id in ais_tracks["assoc_id"]:
    if radar_id not in radar_matchings:
        num_ais_tracks_no_match += 1 
    else:
        radar_matchings[radar_id] += 1
print("Number of ais tracks with no matching: ", num_ais_tracks_no_match)
radar_match_frequency = dict()
for v in radar_matchings.values():
    radar_match_frequency[v] = radar_match_frequency.get(v, 0) + 1
for k in sorted(radar_match_frequency.keys()):
    print("Number of radar records with {:<2} matching AIS records: {:<8}".format(k, radar_match_frequency[k]))

Number of ais tracks with no matching:  9973
Number of radar records with 0  matching AIS records: 4929    
Number of radar records with 1  matching AIS records: 16334   
Number of radar records with 2  matching AIS records: 416     
Number of radar records with 3  matching AIS records: 34      
Number of radar records with 4  matching AIS records: 8       
Number of radar records with 5  matching AIS records: 1       
Number of radar records with 6  matching AIS records: 2       
Number of radar records with 8  matching AIS records: 1       


In [138]:
# Investigate matching from the tagged tracking data
AIS_matchings = dict.fromkeys(set(ais_tracks["id_track"]), 0)
num_radar_tracks_no_match = 0
for ais_id in tracks_tagged["assoc_id"]:
    if ais_id not in AIS_matchings:
        num_radar_tracks_no_match += 1 
    else:
        AIS_matchings[ais_id] += 1
print("Number of tagged tracks with no matching: ", num_radar_tracks_no_match)
AIS_match_frequency = dict()
for v in AIS_matchings.values():
    AIS_match_frequency[v] = AIS_match_frequency.get(v, 0) + 1
for k in sorted(AIS_match_frequency.keys()):
    print("Number of AIS records with {:<2} matching radar records: {:<8}".format(k, AIS_match_frequency[k]))
    

radar_matchings = dict.fromkeys(set(tracks_tagged["id_track"]), 0)
num_ais_tracks_no_match = 0
for radar_id in ais_tracks["assoc_id"]:
    if radar_id not in radar_matchings:
        num_ais_tracks_no_match += 1 
    else:
        radar_matchings[radar_id] += 1
print("Number of ais tracks with no matching: ", num_ais_tracks_no_match)
radar_match_frequency = dict()
for v in radar_matchings.values():
    radar_match_frequency[v] = radar_match_frequency.get(v, 0) + 1
for k in sorted(radar_match_frequency.keys()):
    print("Number of radar records with {:<2} matching AIS records: {:<8}".format(k, radar_match_frequency[k]))

Number of tagged tracks with no matching:  9000
Number of AIS records with 0  matching radar records: 27285   
Number of AIS records with 1  matching radar records: 13      
Number of ais tracks with no matching:  27162
Number of radar records with 0  matching AIS records: 8885    
Number of radar records with 1  matching AIS records: 120     
Number of radar records with 2  matching AIS records: 8       


In [155]:
# Compute the number of records with valid activity tag: 
np.count_nonzero(np.sum(tracks_tagged[["transit", "overnight", "loiter", "cleanup", "fishing_c", "fishing_r", "research", "diving", "repairs", "distress", "other"]].to_numpy(), axis = 1))

591

Trajectory Detection Data

In [161]:
radar_detections.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7387790 entries, 0 to 7387789
Data columns (total 14 columns):
 #   Column      Dtype  
---  ------      -----  
 0   id_detect   int64  
 1   id_track    int64  
 2   id_site     int64  
 3   id_m2       object 
 4   source      object 
 5   speed       float64
 6   course      float64
 7   assoc_str   int64  
 8   assoc_id    int64  
 9   confidence  float64
 10  cdate       object 
 11  ctime       object 
 12  longitude   float64
 13  latitude    float64
dtypes: float64(5), int64(5), object(4)
memory usage: 789.1+ MB


In [162]:
tagged_detections.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6756272 entries, 0 to 6756271
Data columns (total 14 columns):
 #   Column      Dtype  
---  ------      -----  
 0   id_detect   int64  
 1   id_track    int64  
 2   id_site     int64  
 3   id_m2       object 
 4   source      object 
 5   speed       float64
 6   course      float64
 7   assoc_str   float64
 8   assoc_id    float64
 9   confidence  float64
 10  cdate       object 
 11  ctime       object 
 12  longitude   float64
 13  latitude    float64
dtypes: float64(7), int64(3), object(4)
memory usage: 721.6+ MB


In [166]:
# Number of unique tracks in detection:
print(len(radar_detections["id_track"].unique()))
print(len(tagged_detections["id_track"].unique()))

19947
9020


In [167]:
print(len(set(radar_detections["id_track"]) & set(radar_tracks["id_track"])))

19947


In [176]:
print(len(set(tagged_detections["id_track"]) & set(ais_tracks["id_track"])))

5


In [None]:
# Check for high confidence points
len(tagged_detections[tagged_detections["confidence"] > 0.5]) 

6483431

In [202]:
tracks_tagged["stime"].head(100)

0     10:00:06
1     09:50:48
2     10:32:35
3     10:40:08
4     10:47:46
        ...   
95    22:11:02
96    10:54:55
97    10:56:45
98    11:06:43
99    11:07:52
Name: stime, Length: 100, dtype: object

In [203]:
tracks_tagged["sdate"].head(100)

0     2023-07-04
1     2023-07-14
2     2023-07-14
3     2023-07-14
4     2023-07-14
         ...    
95    2023-09-07
96    2023-09-12
97    2023-09-12
98    2023-09-12
99    2023-09-12
Name: sdate, Length: 100, dtype: object