## Finding UTIG data files

This notebook is a starting point for discovering what data files exist.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import glob
import os
from pathlib import Path

from utig_radar_loading import file_util

In [3]:
use_cache = True
cache_dir = "outputs/file_index.csv"
base_path = "/kucresis/scratch/data/UTIG"

df_files = file_util.load_file_index_df(base_path, cache_dir, read_cache=use_cache)

df_files

Reading from cache file outputs/file_index.csv


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,/,kucresis,scratch,data,UTIG,UTIG2,orig,xlob,AMY,JKB2u,X62c,RADnh5,xds.gz,
1,/,kucresis,scratch,data,UTIG,UTIG2,orig,xlob,AMY,JKB2u,Y164b,RADnh5,xds.gz,
2,/,kucresis,scratch,data,UTIG,UTIG2,orig,xlob,AMY,JKB2u,Y174b,RADnh5,xds.gz,
3,/,kucresis,scratch,data,UTIG,UTIG2,orig,xlob,AMY,JKB2u,Y210b,RADnh5,xds.gz,
4,/,kucresis,scratch,data,UTIG,UTIG2,orig,xlob,AMY,JKB2u,Y226b,RADnh5,xds.gz,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21998,/,kucresis,scratch,data,UTIG,UTIG1,targ,pcor,WSB,MKB2l,R24a,GPSnc1,xds.gz,
21999,/,kucresis,scratch,data,UTIG,UTIG1,targ,pcor,WSB,MKB2l,R24a,LASrz1,xds.gz,
22000,/,kucresis,scratch,data,UTIG,UTIG1,targ,pcor,WSB,MKB2l,R24a,MAGgm2,xds.gz,
22001,/,kucresis,scratch,data,UTIG,UTIG1,targ,pcor,WSB,MKB2l,R24a,MAGim1,xds.gz,


In [4]:
df_artifacts = file_util.create_artifacts_df(df_files)
df_artifacts

Unnamed: 0,dataset,processing_level,processing_type,prj,set,trn,stream,full_path,artifact
0,UTIG2,orig,xlob,AMY,JKB2u,X62c,RADnh5,/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/AM...,"(orig, xlob, RADnh5)"
1,UTIG2,orig,xlob,AMY,JKB2u,Y164b,RADnh5,/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/AM...,"(orig, xlob, RADnh5)"
2,UTIG2,orig,xlob,AMY,JKB2u,Y174b,RADnh5,/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/AM...,"(orig, xlob, RADnh5)"
3,UTIG2,orig,xlob,AMY,JKB2u,Y210b,RADnh5,/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/AM...,"(orig, xlob, RADnh5)"
4,UTIG2,orig,xlob,AMY,JKB2u,Y226b,RADnh5,/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/AM...,"(orig, xlob, RADnh5)"
...,...,...,...,...,...,...,...,...,...
21998,UTIG1,targ,pcor,WSB,MKB2l,R24a,GPSnc1,/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/WS...,"(targ, pcor, GPSnc1)"
21999,UTIG1,targ,pcor,WSB,MKB2l,R24a,LASrz1,/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/WS...,"(targ, pcor, LASrz1)"
22000,UTIG1,targ,pcor,WSB,MKB2l,R24a,MAGgm2,/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/WS...,"(targ, pcor, MAGgm2)"
22001,UTIG1,targ,pcor,WSB,MKB2l,R24a,MAGim1,/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/WS...,"(targ, pcor, MAGim1)"


In [5]:
df_grouped = df_artifacts.groupby(['dataset', 'prj', 'set', 'trn']).agg(list).reset_index()

df_grouped['has_radar'] = df_grouped['artifact'].apply(lambda x: 'RAD' in str(x))
df_grouped['has_gps'] = df_grouped['artifact'].apply(lambda x: ('GPSnc1' in str(x)) or ('GPStp2' in str(x)))

df_grouped

Unnamed: 0,dataset,prj,set,trn,processing_level,processing_type,stream,full_path,artifact,has_radar,has_gps
0,UTIG1,AHL,JKB2n,R50a,"[orig, targ, targ, targ, targ, targ, targ, tar...","[xlob, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[RADnh3, AQNnr1, AVNcp1, AVNcp2, AVNnt2, GPSap...",[/kucresis/scratch/data/UTIG/UTIG1/orig/xlob/A...,"[(orig, xlob, RADnh3), (targ, pcor, AQNnr1), (...",True,True
1,UTIG1,ALG,JKB0a,X01a,"[targ, targ, targ, targ, targ, targ, targ, tar...","[pcor, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[AVNcp1, AVNcp2, AVNiz1, GPSap3, GPSkc1, GPStp...",[/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/A...,"[(targ, pcor, AVNcp1), (targ, pcor, AVNcp2), (...",False,True
2,UTIG1,ALG,JKB0a,X02a,"[targ, targ, targ, targ, targ, targ, targ, tar...","[pcor, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[AVNcp1, AVNcp2, AVNiz1, GPSap3, GPSkc1, GPStp...",[/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/A...,"[(targ, pcor, AVNcp1), (targ, pcor, AVNcp2), (...",False,True
3,UTIG1,ALG,JKB0a,X03a,"[targ, targ, targ, targ, targ, targ, targ, tar...","[pcor, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[AVNcp1, AVNcp2, AVNiz1, GPSap3, GPSkc1, GPStp...",[/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/A...,"[(targ, pcor, AVNcp1), (targ, pcor, AVNcp2), (...",False,True
4,UTIG1,ALG,JKB0a,X04a,"[targ, targ, targ, targ, targ, targ, targ, tar...","[pcor, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[AVNcp1, AVNcp2, AVNiz1, GPSap3, GPSkc1, GPStp...",[/kucresis/scratch/data/UTIG/UTIG1/targ/pcor/A...,"[(targ, pcor, AVNcp1), (targ, pcor, AVNcp2), (...",False,True
...,...,...,...,...,...,...,...,...,...,...,...
2627,UTIG2,TOT3,JKB2s,X13a,"[orig, targ, targ, targ, targ, targ, targ, tar...","[xlob, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[RADnh5, AQNnr1, AVNcp1, AVNcp2, AVNnt2, GPSkc...",[/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/T...,"[(orig, xlob, RADnh5), (targ, pcor, AQNnr1), (...",True,True
2628,UTIG2,TOT3,JKB2s,X15a,"[orig, targ, targ, targ, targ, targ, targ, tar...","[xlob, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[RADnh5, AQNnr1, AVNcp1, AVNcp2, AVNnt2, GPSkc...",[/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/T...,"[(orig, xlob, RADnh5), (targ, pcor, AQNnr1), (...",True,True
2629,UTIG2,TOT3,JKB2s,Y07a,"[orig, targ, targ, targ, targ, targ, targ, tar...","[xlob, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[RADnh5, AQNnr1, AVNcp1, AVNcp2, AVNnt2, GPSkc...",[/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/T...,"[(orig, xlob, RADnh5), (targ, pcor, AQNnr1), (...",True,True
2630,UTIG2,TOT3,JKB2s,Y10a,"[orig, targ, targ, targ, targ, targ, targ, tar...","[xlob, pcor, pcor, pcor, pcor, pcor, pcor, pco...","[RADnh5, AQNnr1, AVNcp1, AVNcp2, AVNnt2, GPSkc...",[/kucresis/scratch/data/UTIG/UTIG2/orig/xlob/T...,"[(orig, xlob, RADnh5), (targ, pcor, AQNnr1), (...",True,True


In [6]:
for dataset in ['UTIG1', 'UTIG2']:
    print(f"== {dataset} ==")
    df_ds = df_grouped[df_grouped['dataset'] == dataset]
    print(f"Total segments: {len(df_ds)}")
    print(f"Segments with GPS: {df_ds['has_gps'].sum()}")
    print(f"Segments with radar: {df_ds['has_radar'].sum()}")
    print(f"Segments with GPS and radar: {len(df_ds[(df_ds['has_gps']) & (df_ds['has_radar'])])}")
    print(f"")

== UTIG1 ==
Total segments: 2443
Segments with GPS: 857
Segments with radar: 315
Segments with GPS and radar: 315

== UTIG2 ==
Total segments: 189
Segments with GPS: 189
Segments with radar: 161
Segments with GPS and radar: 161



In [7]:
df_artifacts.query("prj == 'ASB' and set == 'JKB1a' and trn == 'GL0092a'")

Unnamed: 0,dataset,processing_level,processing_type,prj,set,trn,stream,full_path,artifact
2382,UTIG1,orig,xlob,ASB,JKB1a,GL0092a,GPSkc1,/kucresis/scratch/data/UTIG/UTIG1/orig/xlob/AS...,"(orig, xlob, GPSkc1)"


In [8]:
df_utig2 = df_artifacts[df_artifacts['dataset'] == 'UTIG2']
df_utig2['stream'].unique()

array(['RADnh5', 'AQNnr1', 'AQNnr3', 'GPSkc1', 'GPSnc1', 'GPSnc2',
       'LASrz1', 'MAGgm2', 'MAGim1', 'AVNcp1', 'AVNcp2', 'AVNnt2'],
      dtype=object)

In [9]:
# # All of the data that's missing a parsable GPS stream packet type has only GPSkc1 (which supplies timing but not position)

# def any_gps_stream_except_kc1(l):
#     for stream_type in l:
#         if l == 'GPSkc1':
#             continue
#         elif 'GPS' in l:
#             return True
#     return False

# ((df_tmp[(~df_tmp['has_gps'])])['stream'].apply(lambda s: any_gps_stream_except_kc1(s))).sum()

In [10]:
# glob.glob(f"{base_path}/UTIG2/**/Y174b/RAD*/*", recursive=True)