## Function to see detection overlap in space and time

In [30]:
import os
import pickle
import pandas as pd
from glob import glob
from datetime import datetime, timedelta
from geopy.distance import geodesic

### station metadata list

In [41]:
station_metadata = [{"id": "J18B", "lat": 44.0, "lon": -125.5},
            {"id": "J10B", "lat": 43.3, "lon": -125.5},
            {"id": "G34B", "lat": 42.6, "lon": -125.2},
            {"id": "G26B", "lat": 41.9, "lon": -125.2},
            {"id": "J67C", "lat": 48.2, "lon": -127.1},
            {"id": "J26C", "lat": 44.7, "lon": -125.5},
            {"id": "J34C", "lat": 45.3, "lon": -125.4},
            {"id": "J51C", "lat": 46.8, "lon": -126.2},
            {"id": "J10D", "lat": 43.3, "lon": -125.5},
            {"id": "J26D", "lat": 44.7, "lon": -125.5},
            {"id": "J18D", "lat": 43.9, "lon": -125.5},
            {"id": "G34D", "lat": 42.6, "lon": -125.4},
            {"id": "G26D", "lat": 41.9, "lon": -125.3},
            {"id": "G18D", "lat": 41.3, "lon": -125.3},
            {"id": "NCBC", "lat": 48.4, "lon": -126.2},
            {"id": "NC89", "lat": 48.75, "lon": -126.9},
            {"id": "M01C", "lat": 49.2, "lon": -126.7},
            {"id": "J73C", "lat": 48.8, "lon": -126.2},
            {"id": "M02C", "lat": 48.3, "lon": -125.6},
            {"id": "M03C", "lat": 47.9, "lon": -125.1},
            {"id": "M04C", "lat": 47.6, "lon": -125.2},
            {"id": "J65C", "lat": 48.0, "lon": -125.1},
            {"id": "J59C", "lat": 47.5, "lon": -126.4},
            {"id": "J58C", "lat": 47.3, "lon": -125.5},
            {"id": "FN16C", "lat": 46.8, "lon": -125.5},
            {"id": "FN11C", "lat": 46.8, "lon": -125.1},
            {"id": "FN17C", "lat": 46.7, "lon": -125.0},
            {"id": "J49C", "lat": 46.4, "lon": -124.4},
            {"id": "M05C", "lat": 46.2, "lon": -124.9},
            {"id": "J42C", "lat": 45.9, "lon": -125.3},
            {"id": "J41C", "lat": 45.8, "lon": -124.5},
            {"id": "M06C", "lat": 45.5, "lon": -124.9},
            {"id": "J33B", "lat": 45.1, "lon": -124.6},
            {"id": "M07C", "lat": 44.9, "lon": -125.1},
            {"id": "M18B", "lat": 44.9, "lon": -125.0},
            {"id": "FC03D", "lat": 44.8, "lon": -124.7},
            {"id": "J25C", "lat": 44.5, "lon": -124.6},
            {"id": "J25B", "lat": 44.5, "lon": -124.6},
            {"id": "J25D", "lat": 44.5, "lon": -124.6},
            {"id": "M09B", "lat": 44.2, "lon": -125.1},
            {"id": "M08C", "lat": 44.1, "lon": -124.9},
            {"id": "J17D", "lat": 43.8, "lon": -124.6},
            {"id": "M10B", "lat": 43.6, "lon": -125.0},
            {"id": "M13D", "lat": 43.6, "lon": -125.0},
            {"id": "J09B", "lat": 43.2, "lon": -124.7},
            {"id": "M11B", "lat": 42.9, "lon": -125.0},
            {"id": "M14D", "lat": 42.6, "lon": -125.0},
            {"id": "G33D", "lat": 42.7, "lon": -124.8},
            {"id": "G33B", "lat": 42.5, "lon": -124.7},
            {"id": "M15D", "lat": 42.2, "lon": -124.9},
            {"id": "M12B", "lat": 42.2, "lon": -124.9},
            {"id": "G25B", "lat": 41.9, "lon": -124.6},
            {"id": "M13B", "lat": 41.7, "lon": -124.9},
            {"id": "M16D", "lat": 41.7, "lon": -124.8},
            {"id": "G18B", "lat": 41.3, "lon": -124.9},
            {"id": "G17D", "lat": 41.4, "lon": -124.4},
            {"id": "G17B", "lat": 41.3, "lon": -124.3},
            {"id": "M17D", "lat": 41.0, "lon": -124.6},
            {"id": "M14B", "lat": 41.0, "lon": -124.6},
            {"id": "BACME", "lat": 48.3, "lon": -126.1},
            {"id": "BACND", "lat": 48.3, "lon": -126.2},
            {"id": "CQS64", "lat": 48.7, "lon": -126.9},
            {"id": "FN13C", "lat": 47.0, "lon": -125.3},
            {"id": "FN12C", "lat": 46.9, "lon": -125.1},
            {"id": "FN14C", "lat": 47.0, "lon": -125.0},
            {"id": "FN10C", "lat": 46.9, "lon": -125.0},
            {"id": "FN08C", "lat": 46.9, "lon": -124.9},
            {"id": "FN09C", "lat": 46.8, "lon": -124.9},
            {"id": "FN07C", "lat": 46.9, "lon": -124.8},
            {"id": "FN06C", "lat": 46.9, "lon": -124.7},
            {"id": "FN04C", "lat": 46.9, "lon": -124.6},
            {"id": "FN03C", "lat": 46.9, "lon": -124.5},
            {"id": "FN05C", "lat": 46.9, "lon": -124.7},
            {"id": "J57C", "lat": 47.1, "lon": -124.5},
            {"id": "FN02C", "lat": 46.9, "lon": -124.4},
            {"id": "FN01C", "lat": 46.9, "lon": -124.3},
            {"id": "FN19C", "lat": 46.7, "lon": -124.4},
            {"id": "FN17C", "lat": 46.7, "lon": -125.0},
            {"id": "FN18C", "lat": 46.7, "lon": -124.7}]


### define functions

In [42]:
def extract_detections(path, peaks=1, threshold=100):
    with open(path, 'rb') as f:
        raw = pickle.load(f)

    flattened = []
    for entry in raw:
        if isinstance(entry, list) and len(entry) > 0 and isinstance(entry[0], tuple):
            flattened.append(entry)
        elif isinstance(entry, tuple):
            flattened.append(entry)

    filtered = [
        e for e in flattened
        if len(e) > 4
        and isinstance(e[2], (int, float)) and e[2] > peaks
        and isinstance(e[4], (int, float)) and e[4] > threshold
    ]
    return filtered


In [43]:
def match_detections_with_metadata(classifications_dir, station_metadata):
    raw_files = glob(os.path.join(classifications_dir, '*_raw.pickle'))
    matched = []

    for file_path in raw_files:
        station_id = os.path.basename(file_path).split('_')[0]
        detections = extract_detections(file_path)

        meta = next((s for s in station_metadata if s['id'] == station_id), None)
        if meta and detections:
            matched.append({
                'station': station_id,
                'metadata': meta,
                'detections': detections
            })

    return matched

In [46]:
results = match_detections_with_metadata("classifications", station_metadata)
display(results)

[{'station': 'M11B',
  'metadata': {'id': 'M11B', 'lat': 42.9, 'lon': -125.0},
  'detections': [[(2012-09-02T11:47:52.208283Z, 2012-09-02T11:48:22.213136Z),
    'M11B',
    2,
    0,
    963.3839308935479,
    1.027834629295476e-06],
   [(2012-09-02T11:51:52.247111Z, 2012-09-02T11:53:02.258436Z),
    'M11B',
    4,
    1,
    411.2363054811347,
    3.48478682587718e-07],
   [(2012-09-02T11:53:22.261672Z, 2012-09-02T11:54:22.271379Z),
    'M11B',
    3,
    0,
    1597.4249263888266,
    2.130247690658645e-07],
   [(2012-09-02T11:54:52.276233Z, 2012-09-02T11:56:42.294029Z),
    'M11B',
    3,
    0,
    780.725268097932,
    3.438987976418449e-07],
   [(2012-09-02T11:57:22.300501Z, 2012-09-02T11:57:52.305354Z),
    'M11B',
    2,
    0,
    238.85535584774914,
    4.46788316078964e-07],
   [(2012-09-02T12:07:52.402426Z, 2012-09-02T12:08:32.408897Z),
    'M11B',
    2,
    0,
    167.50790024625226,
    1.8482398829771424e-07],
   [(2012-09-02T12:38:52.703348Z, 2012-09-02T12:39:32.709820

In [51]:
buffer_minutes=10
distance_km=100
overlaps = []
buffer = pd.Timedelta(minutes=buffer_minutes)

for i, entry1 in enumerate(results):
    station1 = entry1['station']
    coords1 = (entry1['metadata']['lat'], entry1['metadata']['lon'])

    for det1 in entry1['detections']:
        try:
            start1 = pd.to_datetime(det1[0]) - buffer
            end1 = pd.to_datetime(det1[1]) + buffer
        except Exception:
            continue

        for j, entry2 in enumerate(results):
            if i == j:
                continue

            station2 = entry2['station']
            coords2 = (entry2['metadata']['lat'], entry2['metadata']['lon'])
            dist = geodesic(coords1, coords2).km
            if dist > distance_km:
                continue

            for det2 in entry2['detections']:
                try:
                    start2 = pd.to_datetime(det2[0]) - buffer
                    end2 = pd.to_datetime(det2[1]) + buffer
                except Exception:
                    continue

                if start1 <= end2 and start2 <= end1:
                    overlaps.append({
                        'station1': station1,
                        'station2': station2,
                        'start1': start1,
                        'end1': end1,
                        'start2': start2,
                        'end2': end2,
                        'distance_km': round(dist, 2)
                    })


{'station': 'FN04C',
 'metadata': {'id': 'FN04C', 'lat': 46.9, 'lon': -124.6},
 'detections': [[(2013-09-02T02:30:11.160099Z, 2013-09-02T02:31:11.167044Z),
   'FN04C',
   2,
   0,
   107.95706299327406,
   7.682205415353158e-07],
  [(2013-09-02T02:59:21.362669Z, 2013-09-02T02:59:51.366141Z),
   'FN04C',
   2,
   0,
   241.46548062880345,
   1.6946493798374608e-06],
  [(2013-09-02T05:58:02.603553Z, 2013-09-02T06:01:02.624389Z),
   'FN04C',
   3,
   0,
   115.01740741762515,
   2.3284987953607676e-06],
  [(2013-09-02T06:16:52.734355Z, 2013-09-02T06:19:32.752876Z),
   'FN04C',
   3,
   1,
   107.29619051716881,
   1.3801248732610198e-05],
  [(2013-09-02T06:28:42.816541Z, 2013-09-02T06:30:32.829274Z),
   'FN04C',
   2,
   0,
   223.88809876339238,
   2.018881070573816e-06],
  [(2013-09-02T20:42:18.744310Z, 2013-09-02T20:43:08.750098Z),
   'FN04C',
   3,
   1,
   306.223315667809,
   1.4717217967996553e-06],
  [(2013-09-03T16:58:57.194405Z, 2013-09-03T16:59:37.199035Z),
   'FN04C',
   2,
  

In [52]:
print(i)

21


In [33]:
def find_spatiotemporal_overlaps(results, buffer_minutes=10, distance_km=100):
    overlaps = []
    buffer = pd.Timedelta(minutes=buffer_minutes)

    for i, entry1 in enumerate(results):
        station1 = entry1['station']
        coords1 = (entry1['metadata']['lat'], entry1['metadata']['lon'])

        for det1 in entry1['detections']:
            try:
                start1 = pd.to_datetime(det1[0]) - buffer
                end1 = pd.to_datetime(det1[1]) + buffer
            except Exception:
                continue

            for j, entry2 in enumerate(results):
                if i == j:
                    continue

                station2 = entry2['station']
                coords2 = (entry2['metadata']['lat'], entry2['metadata']['lon'])
                dist = geodesic(coords1, coords2).km
                if dist > distance_km:
                    continue

                for det2 in entry2['detections']:
                    try:
                        start2 = pd.to_datetime(det2[0]) - buffer
                        end2 = pd.to_datetime(det2[1]) + buffer
                    except Exception:
                        continue

                    if start1 <= end2 and start2 <= end1:
                        overlaps.append({
                            'station1': station1,
                            'station2': station2,
                            'start1': start1,
                            'end1': end1,
                            'start2': start2,
                            'end2': end2,
                            'distance_km': round(dist, 2)
                        })

    return overlaps

In [44]:
results = match_detections_with_metadata("classifications", stations)
overlaps = find_spatiotemporal_overlaps(results)

print(f"Found {len(overlaps)} spatiotemporal overlaps.")

Found 0 spatiotemporal overlaps.
