In [1]:
import os
from pathlib import Path
import numpy as np
from tqdm.auto import tqdm
import pickle
from collections import defaultdict
import importlib
from matplotlib import pyplot as plt
from tqdm.auto import tqdm

from themachinethatgoesping.echosounders import kongsbergall, simradraw, index_functions
from themachinethatgoesping import pingprocessing as pp
from themachinethatgoesping import tools as ptools

In [2]:
import themachinethatgoesping as tmtgp
tmtgp.version()

themachinethatgoesping
- version:       0.14.1

modules:
- tools_cppy     0.21.5
- tools          @PROJECT_VERSION@
- scripts        @PROJECT_VERSION@
- algorithms     0.5.4
- navigation     0.14.2
- echosounders_cppy 0.32.0
- echosounders   0.32.0
- pingprocessing_cppy 0.4.0
- pingprocessing @PROJECT_VERSION@
- gridding       @PROJECT_VERSION@


In [3]:
#create the output path
path_out = "../unittest_data"
path_in = "./data_in/"

os.makedirs(path_out,exist_ok=True)
os.makedirs(path_in,exist_ok=True)

## Explore and sort input data

In [4]:
folders = index_functions.find_folders_with_files(path_in, [".all",".wcd"], followlinks=True)
folders.sort()
N = 20

for input_path in folders:
    print(input_path)

Found 2447 files
./data_in/em2040/Meteor-M143/MBES/M143_EM122_Ukraine_raw
./data_in/em2040/Meteor-M143/MBES/M143_EM122_Ukraine_raw/Ukraine02_initial
./data_in/em2040/Meteor-M143/MBES/M143_EM122_raw
./data_in/em2040/Meteor-M143/MBES/M143_EM710_Ukraine_raw
./data_in/em2040/Meteor-M143/MBES/M143_EM710_raw
./data_in/em2040/alex
./data_in/em2040/em2040_Turbeams_1/TURBEAMS_20220318_W08
./data_in/em2040/em2040_Turbeams_1/TURBEAMS_220317_W05
./data_in/em2040/em2040_Turbeams_1/TURBEAMS_220319_MOW1
./data_in/em2040/em2040_Turbeams_3
./data_in/em2040/koen campaign 2/em2024/B2503_interferenceTest
./data_in/em2040/koen campaign 2/em2024/Campaign2305-turbeams-calibration
./data_in/em2040/koen campaign 2/em2024/em2040/C2305-ITEST-SEA
./data_in/em2040/koen campaign 2/em2024/em2040/C2305_DPTEST
./data_in/em2040/koen campaign 2/em2024/em2040/C2305_FCABRAL_PLUME1/00_RAW_TIDE_DRAUGHT_AC_DATA
./data_in/em2040/koen campaign 2/em2024/em2040/C2305_FCABRAL_PLUME2/00_RAW_TIDE_DRAUGHT_AC_DATA
./data_in/em2040/ko

In [5]:
prg = tqdm(total = len(folders)*2, desc="Creating test data", unit="files")

for input_path in folders:
    for endings,prefix,postfix in [
        [".all"         , "all"     , ".all",],
        [".wcd"         , "wcd"     , ".wcd",]
        ]:

        prg.update(1)
        prg.set_postfix_str(f"{input_path} / {prefix}")

        prg.set_description(f"finding files")
        input_files = index_functions.find_files(input_path,endings, followlinks=True, verbose = False)

        #open and index files
        prg.set_description(f"opening files")
        index = index_functions.load_index_files(input_files)
        fm = kongsbergall.KongsbergAllFileHandler_mapped(input_files, cached_index = index, show_progress=False)
        index_functions.update_index_files(fm.get_cached_file_index())

        # sort primary file numbers per folder and then per first time stamp
        prg.set_description(f"preparing files")

        interfaces = defaultdict(list)

        for interface in tqdm(fm.datagramdata_interface.per_primary_file(),delay=1):
            path = Path(interface.get_file_path())
            interfaces[path.parent].append(interface)

        for path, interfaces_ in tqdm(interfaces.items(),delay=1):
            interfaces_.sort(key=lambda x: x.get_timestamp_first())

        # split the pings per (primary) file path
        pings_per_file_path = pp.split_pings.by_file_path(fm.pings(), progress=False)

        path_out_ = f"{path_out}/{prefix}"
        os.makedirs(path_out_,exist_ok=True)

        prg.set_description(f"writing pings")
        # for each final folder in path_in
        for path, interfaces_ in tqdm(interfaces.items(),delay=1):
            #take the first N pings of the file in the middle of the folder
            middle_file_interface = interfaces_[len(interfaces_)//2]
            file_path = middle_file_interface.get_file_path()
            pings = pings_per_file_path[file_path]

            #get the last timestamp of the Nth ping
            pings = pings[:N]
            last_timestamp = pings[-1].file_data.get_timestamp_last()
            channel_ids = list(pp.split_pings.by_channel_id(pings).keys())

            #build the file nam,e
            d = middle_file_interface.datagrams("InstallationParametersStart")[0]

            time = ptools.timeconv.unixtime_to_datestring(d.get_timestamp(), format="%Y%m%d_%H%M%S")
            is_dual_rx = d.is_dual_rx()
            model = f"EM{d.get_model_number()}"
            channel_id_string = "|".join(channel_ids)
            parent = str(Path(file_path).parent).split("/")[-1]

            key = f"{parent}-{time}-{model}-[{channel_id_string}]-dual_rx({is_dual_rx})"
            print(f" -{key}")

            with open(f"{path_out_}/{key}{postfix}","wb") as ofi:
                #loop through all datagrams in the file
                for d in middle_file_interface.datagrams():
                    #if the datagram is after the last timestamp of the Nth ping, stop
                    if d.get_timestamp() > last_timestamp:
                        break

                    #write the datagram to the file
                    ofi.write(d.to_binary())


prg.close()
        

Creating test data:   0%|          | 0/90 [00:00<?, ?files/s]

removing duplicates:  22%|##2       | 31/139 [00:03<00:11,  9.50it/s]

 -M143_EM122_Ukraine_raw-20171216_123028-EM122-[TRX-111]-dual_rx(False)


removing duplicates:  16%|#5        | 22/138 [00:03<00:16,  7.03it/s]

 -M143_EM122_Ukraine_raw-20171216_123028-EM122-[TRX-111]-dual_rx(False)
 -Ukraine02_initial-20171215_191021-EM122-[TRX-111]-dual_rx(False)
 -Ukraine02_initial-20171215_191021-EM122-[TRX-111]-dual_rx(False)
 -M143_EM122_raw-20171213_164425-EM122-[TRX-111]-dual_rx(False)
 -M143_EM122_raw-20171213_161424-EM122-[TRX-111]-dual_rx(False)
 -M143_EM710_Ukraine_raw-20171216_123020-EM710-[TRX-221]-dual_rx(False)
 -M143_EM710_Ukraine_raw-20171216_120020-EM710-[TRX-221]-dual_rx(False)
 -M143_EM710_raw-20171214_024740-EM710-[TRX-221]-dual_rx(False)
 -M143_EM710_raw-20171214_033859-EM710-[TRX-221]-dual_rx(False)
 -alex-20140213_061745-EM2045-[TRX-102]-dual_rx(False)
 -alex-20140213_061745-EM2045-[TRX-102]-dual_rx(False)
 -TURBEAMS_20220318_W08-20220318_144808-EM2040-[TRX-2031|TRX-2004]-dual_rx(True)
 -TURBEAMS_20220318_W08-20220318_144808-EM2040-[TRX-2031|TRX-2004]-dual_rx(True)
 -TURBEAMS_220317_W05-20220317_182909-EM2040-[TRX-2004|TRX-2031]-dual_rx(True)
 -TURBEAMS_220317_W05-20220317_182909-EM204