### Data Combination and Filtering

Output `full_non_transit_radar` inside filtered_data folder. 

Comprehensive labels of radar detections (predicted as non-transit of confidence score >= 0.5) and tagged detections labelled as non-transit manually combined.

Currently have 3043 rows

#### Manual Assignment Logic

* If activity == loster/seine/trawl, assign type_agg = fishing_boat for tagged tracks
* For radar tracks: infer on activity based on type might be a bit problematic ...



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm
import sys
import os

sys.path.append(os.path.abspath('..'))

#detections data
radar_detections = pd.read_csv('../../data/cleaned_data/preprocessed_radar_detections.csv')
tagged_detections = pd.read_csv('../../data/cleaned_data/preprocessed_tagged_detections.csv')

#label data
activity_labels_from_tagged = pd.read_csv('../../data/labels/activity_labels_from_tagged.csv')
inferred_activity_label = pd.read_csv('../../data/labels/radar_activity_labels_inferred.csv')
ais_type_labels = pd.read_csv('../../data/labels/ais_type_labels.csv')

In [6]:
#Deemed as transit if confidence score >= 0.5
transit_inferred = inferred_activity_label[inferred_activity_label['activity_inferred'] == 'transit']
transit_inferred = transit_inferred[transit_inferred['activity_confidence_score'] >= 0.5]
stopped_inferred = inferred_activity_label[inferred_activity_label['activity_inferred'] == 'stopped']
stopped_inferred = stopped_inferred[stopped_inferred['activity_confidence_score'] >= 0.5]

transit_stopped_ids = set(transit_inferred['id_track']) | set(stopped_inferred['id_track'])

non_transit_stopped_inferred = inferred_activity_label[
    ~inferred_activity_label['id_track'].isin(transit_stopped_ids)
]

print(f'Total number of transit tracks: {len(transit_inferred)}')
print(f'Total number of non-transit & non_stopped tracks: {len(non_transit_stopped_inferred)}')


Total number of transit tracks: 12885
Total number of non-transit & non_stopped tracks: 1299


In [7]:
#full non-transit radar labels
filtered_radar_labels = pd.merge(ais_type_labels, non_transit_stopped_inferred, on='id_track', how='inner')
filtered_radar_labels.drop_duplicates(subset=['id_track'], inplace=True)

In [13]:
non_transit_stopped_tagged = activity_labels_from_tagged[
    (activity_labels_from_tagged['activity'] != 'transit') &
    (activity_labels_from_tagged['activity'] != 'stopped') 
]


In [14]:
full_non_transit_stopped_labels = pd.concat([non_transit_stopped_tagged, filtered_radar_labels], ignore_index=True)
full_non_transit_stopped_labels

Unnamed: 0,id_track,activity,type_m2,activity_inferred,activity_confidence_score
0,30278317,drifting,,,
1,31250348,hook,,,
2,32095898,drifting,,,
3,32096807,drifting,,,
4,32099745,drifting,,,
...,...,...,...,...,...
1875,39018416,,pleasure_craft,drifting,0.808124
1876,38170241,,pleasure_craft,drifting,0.776164
1877,39009659,,pleasure_craft,lobster,0.884421
1878,36223372,,passenger_ship,hook,0.997884


In [15]:
from core.DICT import TYPES2AGG

full_non_transit_stopped_labels['type_m2_agg'] = full_non_transit_stopped_labels['type_m2'].map(TYPES2AGG)
full_non_transit_stopped_labels['type_m2_agg'].value_counts()


type_m2_agg
class_b           579
other             329
passenger_ship    221
tug/tow           102
cargo/tanker       30
military_ship      22
fishing_boat       16
Name: count, dtype: int64

In [16]:
#infer on type_agg based on activity
full_non_transit_stopped_labels.loc[
    (full_non_transit_stopped_labels['activity'] == 'lobster') |
    (full_non_transit_stopped_labels['activity'] == 'seine') |
    (full_non_transit_stopped_labels['activity'] == 'trawl') |
    (full_non_transit_stopped_labels['activity'] == 'hook'),
    'type_m2_agg'
] = 'fishing_boat'


In [18]:
full_non_transit_stopped_labels.to_csv('../../data/labels/full_non_transit_stopped_radar_labels.csv', index=False)

In [19]:
full_non_transit_stopped_labels['id_track'].nunique()

1880