In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define file paths
ais_tracks_path = '../data/tracks_ais.csv'
radar_tracks_path = '../data/tracks_radar.csv'
radar_detections_path = '../data/detections_radar.csv'

ais_tracks = pd.read_csv(ais_tracks_path)
radar_tracks = pd.read_csv(radar_tracks_path)
radar_detections = pd.read_csv(radar_detections_path)

In [2]:
features_subset = ['min_speed', 'max_speed', 'avg_speed', 'curviness', 'heading_mean', 'heading_std', 
 'turning_mean', 'turning_std', 'duration_z', 'distance', 'distance_o']

In [53]:
#datetime conversion
radar_detections['datetime'] = pd.to_datetime(radar_detections['cdate'] + ' ' + radar_detections['ctime'])

### Speed

In [54]:
import numpy as np

def distance_between_points(lat1, lon1, lat2, lon2, r = 6371):
    """
    Calculate the distance between two points on the Earth's surface
    using the Haversine formula.

    Args:
        lat1 (float): Latitude of the first point
        lon1 (float): Longitude of the first point
        lat2 (float): Latitude of the second point
        lon2 (float): Longitude of the second point
        r (float): Radius of the Earth in kilometers
    """
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    a = np.sin(delta_phi / 2) ** 2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(delta_lambda / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return r * c

In [110]:
def compute_track_stats(group, KMPS_TO_KNOTS = 1943.84449):
    group = group.sort_values(by='datetime').copy()
    
    group['latitude_prev'] = group['latitude'].shift(1)
    group['longitude_prev'] = group['longitude'].shift(1)
    group['time_prev'] = group['datetime'].shift(1)
    group['distance_travelled'] = distance_between_points(
        group['latitude_prev'], group['longitude_prev'],
        group['latitude'], group['longitude']
    )

    group['time_diff'] = (group['datetime'] - group['time_prev']).dt.total_seconds()
    group['instant_speed'] = group['distance_travelled'] / group['time_diff']  # in km/s

    total_distance = group['distance_travelled'].sum()
    total_time = (group['datetime'].iloc[-1] - group['datetime'].iloc[0]).total_seconds()

    if total_time > 0:
        avg_speed_kmps = total_distance / total_time
    else:
        avg_speed_kmps = 0

    avg_speed = avg_speed_kmps * KMPS_TO_KNOTS
    max_speed = group['instant_speed'].max() * KMPS_TO_KNOTS
    min_speed = group['instant_speed'].min() * KMPS_TO_KNOTS

    return pd.Series({
        'assoc_id': group['assoc_id'].iloc[0],
        'avg_speed_knots': avg_speed,
        'max_speed_knots': max_speed,
        'min_speed_knots': min_speed
    })


In [96]:
common_ids = set(radar_tracks['id_track']) & set(radar_detections['assoc_id'])
common_radar_detections = radar_detections[radar_detections['assoc_id'].isin(common_ids)]
common_radar_detections.head(3)

Unnamed: 0,id_detect,id_track,id_site,id_m2,source,speed,course,assoc_str,assoc_id,confidence,cdate,ctime,longitude,latitude,datetime
0,1313152737,29941750,10,83-0702-1140,radar,0.4,66.0,20,29941830,0.996607,2023-07-02,11:40:11,-117.302575,32.81512,2023-07-02 11:40:11
1,1313152739,29941750,10,83-0702-1140,radar,0.5,46.9,20,29941830,0.996607,2023-07-02,11:40:17,-117.302553,32.815148,2023-07-02 11:40:17
2,1313152741,29941750,10,83-0702-1140,radar,0.5,53.3,20,29941830,0.996607,2023-07-02,11:40:24,-117.302543,32.81515,2023-07-02 11:40:24


In [116]:
#testing the distance function
track_1 = radar_detections[radar_detections['id_track'] == 28339098].copy()
track_1[['id_track', 'assoc_id']]

Unnamed: 0,id_track,assoc_id


In [105]:
track_1.loc[:, 'longitude_prev'] = track_1 ['longitude'].shift(1)
track_1.loc[:, 'latitude_prev'] = track_1 ['latitude'].shift(1)
track_1.loc[:, 'distance_travelled'] = distance_between_points(track_1['latitude_prev'], track_1['longitude_prev'], track_1['latitude'], track_1['longitude'])
track_1.loc[:, 'time_prev'] = track_1['datetime'].shift(1)
track_1.loc[:, 'time_diff'] = (track_1['datetime'] - track_1['time_prev']).dt.total_seconds()
track_1.loc[:, 'instant_speed'] = track_1['distance_travelled'] / track_1['time_diff']

total_distance = track_1['distance_travelled'].sum()
total_time = (track_1['datetime'].iloc[-1] - track_1['datetime'].iloc[0]).total_seconds() # in seconds
avg_speed_kmps = total_distance / total_time
avg_speed = avg_speed_kmps * 1943.84449 #in knots
max_speed = track_1['instant_speed'].max() * 1943.84449
min_speed = track_1['instant_speed'].min() * 1943.84449
print(total_distance, total_time)
print(avg_speed, max_speed, min_speed)

1.4641396804438611 755.0
3.7696156959220666 10.989409736102735 0.07204854856538934


In [99]:
debug_columns = ['id_track', 'datetime', 'longitude', 'latitude', 'longitude_prev', 'latitude_prev', 'distance_travelled', 'time_prev', 'time_diff', 'instant_speed']
track_1[debug_columns]

Unnamed: 0,id_track,datetime,longitude,latitude,longitude_prev,latitude_prev,distance_travelled,time_prev,time_diff,instant_speed
0,29941750,2023-07-02 11:40:11,-117.302575,32.815120,,,,NaT,,
1,29941750,2023-07-02 11:40:17,-117.302553,32.815148,-117.302575,32.815120,0.003731,2023-07-02 11:40:11,6.0,0.000622
2,29941750,2023-07-02 11:40:24,-117.302543,32.815150,-117.302553,32.815148,0.000961,2023-07-02 11:40:17,7.0,0.000137
3,29941750,2023-07-02 11:40:30,-117.302525,32.815115,-117.302543,32.815150,0.004240,2023-07-02 11:40:24,6.0,0.000707
4,29941750,2023-07-02 11:40:36,-117.302515,32.815090,-117.302525,32.815115,0.002933,2023-07-02 11:40:30,6.0,0.000489
...,...,...,...,...,...,...,...,...,...,...
115,29941750,2023-07-02 11:52:22,-117.313735,32.820325,-117.313532,32.820215,0.022571,2023-07-02 11:52:15,7.0,0.003224
116,29941750,2023-07-02 11:52:28,-117.313870,32.820398,-117.313735,32.820325,0.015001,2023-07-02 11:52:22,6.0,0.002500
117,29941750,2023-07-02 11:52:34,-117.314072,32.820508,-117.313870,32.820398,0.022492,2023-07-02 11:52:28,6.0,0.003749
118,29941750,2023-07-02 11:52:40,-117.314207,32.820582,-117.314072,32.820508,0.015061,2023-07-02 11:52:34,6.0,0.002510


In [103]:
ais_tracks[ais_tracks['id_track'] == 29941830]

Unnamed: 0,id_track,id_site,id_m2,source,duration,alarm,min_speed,max_speed,avg_speed,curviness,...,dest,eta_month,eta_day,eta_hour,eta_minute,type_m2,sdate,stime,ldate,ltime
371,29941830,10,338417665-0702-1143,ais,2602.0,0,2.6,27.4,16.764286,2.066154,...,,,,,,class_b,2023-07-02,11:43:18,2023-07-02,12:26:40


In [111]:
summary_df = common_radar_detections.groupby('id_track').apply(compute_track_stats).reset_index()

  summary_df = common_radar_detections.groupby('id_track').apply(compute_track_stats).reset_index()


In [126]:
comparison = pd.merge(radar_tracks, summary_df, left_on='id_track', right_on='assoc_id', how='inner')
comparison = comparison[['id_track_x','min_speed_knots', 'max_speed_knots', 'avg_speed_knots', 'min_speed', 'max_speed', 'avg_speed']]

In [128]:
comparison.head(20)

Unnamed: 0,id_track_x,min_speed_knots,max_speed_knots,avg_speed_knots,min_speed,max_speed,avg_speed
0,30050002,4.801705,12.257361,7.76553,2.0,7.6,6.756667
1,30050213,0.140966,10.783325,4.926591,0.0,7.3,4.527027
2,30151672,2.262837,11.380435,6.978428,0.2,7.4,4.495652
3,30543761,6.914489,17.547641,11.530284,0.0,12.0,9.667442
4,31267344,5.016439,24.535403,9.859016,8.6,10.1,9.444615
5,31323782,3.842754,37.295028,12.113187,4.1,13.7,11.056923
6,32411282,4.620737,47.907478,10.057707,1.9,10.9,9.195455
7,33292288,0.418002,24.654663,5.129777,4.5,5.5,4.9375
8,34794064,2.067787,17.24343,8.91247,8.3,9.4,8.735821
9,34861018,1.504141,26.969377,7.351334,6.7,7.7,7.033333
