In [2]:
# Number of scans
import os
from collections import defaultdict

DIR = "/scratch2/wenlongzhao/roostui/data/all_stations_v2_screened"

valid_station_year = set()
for file in sorted(os.listdir(DIR)):
    if file.startswith("roost_labels"):
        valid_station_year.add((file.split("_")[2], file.split("_")[3][:4]))
assert len(valid_station_year) == 246

n_scans = defaultdict(int)
all_n_scans = 0
for file in sorted(os.listdir(DIR)):
    if not file.startswith("scans"):
        continue
    if (file.split("_")[1], file.split("_")[2][:4]) not in valid_station_year:
        continue
    
    lines = open(os.path.join(DIR, file), "r").readlines()
    
    n = 0
    for line in lines[1:]:
        if int(line.split(",")[1][4:6]) < 11:
            n += 1
    n_scans[file.split("_")[1]] = n_scans[file.split("_")[1]] + n
    all_n_scans += n

for station in n_scans:
    print(station, n_scans[station])
print("Total", all_n_scans)

KAPX 51390
KBUF 55402
KCLE 56197
KDLH 49599
KDTX 51024
KGRB 47249
KGRR 52060
KIWX 53534
KLOT 52761
KMKX 50476
KMQT 49611
KTYX 43483
Total 612786


In [5]:
# fields are:
#       0 track_id, 1 filename, 2 from_sunrise, 3 det_score, 4 x, 5 y, 6 r, 7 lon, 8 lat, 9 radius,
#       10 local_time, 11 station, 12 date, 13 time, 14 local_date, 15 length,
#       16 tot_score, 17 avg_score, 18 viewed, 19 user_labeled, 20 label, 21 original_label,
#       22 notes: 'LARGE', 'nr', 'long', 'large', 'rn', 'shrinks', 'shrink',
#       23 day_notes: 'pap', 'psp', 'weather', '2', 'ap', 'AP', 'miss', 'cluster', 'clusters',
import copy

YEARS = range(2000, 2021)
INIT = {
            "high_conf_dets": 0,
            "other_dets": 0,
            "other_dets_screened_as_roosts": 0,
            "high_conf_tracks": 0,
            "other_tracks": 0,
            "other_tracks_screened_as_roosts": 0,

            "swallow-roost": [set(), 0],
            "weather-roost": [set(), 0],
            "AP-roost": [set(), 0],
            "unknown-noise-roost": [set(), 0],
            "duplicate": [set(), 0],
            "bad-track": [set(), 0],
            "non-roost": [set(), 0],
        }

stations = {"ALL": copy.deepcopy(INIT)}
for file in sorted(os.listdir(DIR)):
    if not file.startswith("roost_labels"):
        continue
        
    station = file.split("_")[2]
    year = file.split("_")[3][:4]
    assert int(year) in YEARS
    if station not in stations:
        stations[station] = copy.deepcopy(INIT)
    
    annotations = [annotation.strip().split(",") for annotation in open(os.path.join(DIR, file), "r").readlines()[1:]]
    tracks = {}
    for annotation in annotations:
        if int(annotation[10][4:6]) < 6 or int(annotation[10][4:6]) > 10: # check local month
            continue
        
        # system predicted
        if annotation[0] not in tracks:
            tracks[annotation[0]] = [0, 0, 0, False, False]
                # sum score, num dets have score, num dets, has det w/ 0.5 score, screened as roosts
        annotation[3] = float(annotation[3]) # det_score
        if annotation[3] > 0 and annotation[3] < 1:
            tracks[annotation[0]][0] = tracks[annotation[0]][0] + annotation[3] # sum score
            tracks[annotation[0]][1] = tracks[annotation[0]][1] + 1 # num dets have score
        tracks[annotation[0]][2] = tracks[annotation[0]][2] + 1 # num dets
        if annotation[3] >= 0.5: # has det w/ 0.5 score
            tracks[annotation[0]][3] = True
        
        # human screened
        if annotation[23].lower() in ["weather", "ap"]:
            stations[station]["non-roost"][0].add(annotation[0])
            stations[station]["non-roost"][1] += 1
            stations["ALL"]["non-roost"][0].add(annotation[0])
            stations["ALL"]["non-roost"][1] += 1
        else:
            stations[station][annotation[20]][0].add(annotation[0])
            stations[station][annotation[20]][1] += 1
            stations["ALL"][annotation[20]][0].add(annotation[0])
            stations["ALL"][annotation[20]][1] += 1
            if annotation[20] in ["swallow-roost", "weather-roost", "AP-roost", "unknown-noise-roost"]:
                tracks[annotation[0]][4] = True
    
    # system predicted
    for track_id, track in tracks.items():
        if track[0] / track[1] >= 0.15 and track[2] >= 2 and track[3]:
            stations[station]["high_conf_dets"] += track[2]
            stations["ALL"]["high_conf_dets"] += track[2]
            stations[station]["high_conf_tracks"] += 1
            stations["ALL"]["high_conf_tracks"] += 1
        else:
            stations[station]["other_dets"] += track[2]
            stations["ALL"]["other_dets"] += track[2]
            stations[station]["other_tracks"] += 1
            stations["ALL"]["other_tracks"] += 1
            if track[4]:
                stations[station]["other_dets_screened_as_roosts"] += track[2]
                stations["ALL"]["other_dets_screened_as_roosts"] += track[2]
                stations[station]["other_tracks_screened_as_roosts"] += 1
                stations["ALL"]["other_tracks_screened_as_roosts"] += 1

for station in stations:
    print(
        station,
        f"{stations[station]['high_conf_dets']} & {stations[station]['other_dets']} & "\
        f"{stations[station]['other_dets_screened_as_roosts']} & "\
        f"{stations[station]['high_conf_tracks']} & {stations[station]['other_tracks']} & "\
        f"{stations[station]['other_tracks_screened_as_roosts']}"\
    )
print()
for station in stations:
    print(
        station,
        f"{stations[station]['swallow-roost'][1]}/{len(stations[station]['swallow-roost'][0])} & "\
        f"{stations[station]['weather-roost'][1]}/{len(stations[station]['weather-roost'][0])} & "\
        f"{stations[station]['AP-roost'][1]}/{len(stations[station]['AP-roost'][0])} & "\
        f"{stations[station]['unknown-noise-roost'][1]}/{len(stations[station]['unknown-noise-roost'][0])} & "\
        f"{stations[station]['duplicate'][1]}/{len(stations[station]['duplicate'][0])} & "\
        f"{stations[station]['bad-track'][1]}/{len(stations[station]['bad-track'][0])} & "\
        f"{stations[station]['non-roost'][1]}/{len(stations[station]['non-roost'][0])}"
    )

ALL 140036 & 372594 & 8266 & 31313 & 230088 & 3603
KAPX 3124 & 16125 & 228 & 738 & 9502 & 113
KBUF 15433 & 42754 & 1309 & 3172 & 26092 & 534
KCLE 22459 & 42774 & 1751 & 4792 & 25735 & 721
KDLH 4817 & 18483 & 368 & 1129 & 11617 & 202
KDTX 19349 & 35309 & 1001 & 3745 & 19707 & 403
KGRB 14106 & 35806 & 581 & 3259 & 20599 & 240
KGRR 10463 & 29320 & 1066 & 2538 & 18624 & 449
KIWX 16447 & 45410 & 1042 & 4138 & 29832 & 493
KLOT 9113 & 41442 & 309 & 2462 & 28083 & 179
KMKX 14957 & 42049 & 289 & 3356 & 26047 & 123
KMQT 1737 & 9766 & 9 & 369 & 6042 & 7
KTYX 8031 & 13356 & 313 & 1615 & 8208 & 139

ALL 55362/13860 & 2534/477 & 480/100 & 6244/1191 & 301/112 & 4440/614 & 443269/245047
KAPX 1040/311 & 14/1 & 18/4 & 22/6 & 5/3 & 32/7 & 18118/9908
KBUF 6669/1628 & 384/79 & 72/11 & 1129/156 & 90/30 & 334/45 & 49509/27315
KCLE 12863/3017 & 800/151 & 53/12 & 903/155 & 46/16 & 585/82 & 49983/27094
KDLH 2249/679 & 52/15 & 54/13 & 136/30 & 13/6 & 49/10 & 20747/11993
KDTX 9568/1990 & 729/117 & 81/12 & 578/95 