# Process the raw data from pyapm and bpnsdata
This notebook is the code to process the output given after processing the data with pypam and bpnsdata
For more information about this process please contact clea.parcerisas@vliz.be or check the documentation of both packages
https://lifewatch-pypam.readthedocs.io/en/latest/
https://github.com/lifewatch/bpnsdata

In [1]:
# Install the required packages. Geopandas can give problems in Windows machines, so better to install them using wheels when using Windows
import sys
!{sys.executable} -m pip install tqdm



In [3]:
import datetime
import pathlib

import geopandas
import numpy as np
import pandas as pd
import xarray
from tqdm import tqdm



In [4]:
# Set the chunk times to analyze (in seconds)
CHUNK_LENGTH = 5
TIME_CHUNK_LENGTH = 1

In [5]:
# List of the deployments that should be skipped after doing a quality check
DEPLOYMENTS_TO_SKIP_ETN = [9815, 9816, 9819, 9826, 9808, 9821, 9849, 9853, 9854, 13241, 13244]

In [6]:
ENV_LABELS = [
    'shipping',
    'season',
    'moon_phase',
    'day_moment',
    'benthic_habitat',
    'substrate',
    'seabed_habitat',
    'tide',
    'salinity',
    'temperature',
    'current',
    'bathymetry',
    'shipwreck_distance',
    'coast_dist'
]

SAMPLE_LABELS = [
    'instrument_name',
    'instrument_depth',
    'etn_id',
    'campaign'
]

ARTIFACTS_LABELS = ['datetime',
                    'filename',
                    'grouped_start_sample',
                    'grouped_end_sample',
                    'label']

In [7]:
CATEGORICAL_VARS = ['day_moment', 'benthic_habitat', 'substrate', 'seabed_habitat', 'instrument_name', 'etn_id',
                    'campaign', 'label']
CYCLIC_VARS = ['season', 'moon_phase']

vars_dtypes = {
    'shipping': int,
    'season': int,
    'moon_phase': np.float16,
    'day_moment': 'category',
    'benthic_habitat': 'category',
    'substrate': 'category',
    'seabed_habitat': 'category',
    'tide': np.float16,
    'salinity': np.float16,
    'temperature': np.float16,
    'current': np.float16,
    'bathymetry': np.float16,
    'shipwreck_distance': np.float16,
    'coast_dist': int,
    'instrument_depth': np.float16
}

# New names for environmental variables for easier use
env_labels_rename = {
    'sea_surface_height_above_sea_level': 'tide',
    'sea_surface_salinity': 'salinity',
    'sea_surface_temperature': 'temperature',
    'surface_baroclinic_sea_water_velocity': 'current',
    'route_density': 'shipping'
}

In [8]:
# Define the folders
data_path = pathlib.Path('./data/raw_data/')
processed_data_path = pathlib.Path('./data/processed/')
raw_data_path = pathlib.Path('./data/raw_data/deployments/')

In [9]:
# Read the metadata csv
metadata = pd.read_csv(data_path.joinpath('data_summary_mda.csv'))
metadata = metadata.loc[~metadata.etn_id.isin(DEPLOYMENTS_TO_SKIP_ETN)]

# Read the labelled data
labels_bad_data = pd.read_csv(data_path.joinpath('labels.csv'), parse_dates=['start_datetime',
                                                                                 'end_datetime',
                                                                                 'start_file'])

# Decide to join the bins 5 in 5
n_join_bins = 5

# Create the empty output vars
df_features = pd.DataFrame()
df_sample = pd.DataFrame()
df_env = pd.DataFrame()
df_geo = geopandas.GeoDataFrame()
df_labels = pd.DataFrame()

# Define the names of the vars that will be used
features_var = 'oct3'
freqticks = None

In [10]:
# Join all the deployments in one DataFrame
df = pd.DataFrame()
total_acoustic_time = 0
for idx in tqdm(metadata.index, total=len(metadata)):
    deployment_row = metadata.loc[idx]
    env_name = '%s_%s_env.nc' % (idx, deployment_row.deployment_name)
    env_path = processed_data_path.joinpath(env_name)
    deployment_file_name = '%s_%s.nc' % (idx, deployment_row.deployment_name)
    name = deployment_row['deployment_name']
    deployment = xarray.open_dataset(env_path)

    deployment = deployment.rename(env_labels_rename)

    # Eliminate the frequencies below 60 Hz and above 45 kHz
    deployment = deployment.sel(frequency=deployment.frequency[deployment.frequency < 45000])
    deployment = deployment.sel(frequency=deployment.frequency[deployment.frequency > 60])
    deployment_duration = deployment.datetime.max() - deployment.datetime.min()
    total_acoustic_time += deployment_duration
    deployment = deployment[ENV_LABELS + [features_var]].dropna('grouped_id', 'any')
    clean_freqticks = deployment.frequency.values

    if len(deployment.id) > 0:
        values_arr = deployment[features_var].values
        if len(deployment.dims) > 2:
            values_arr = values_arr.reshape((deployment.dims['grouped_id'],
                                             deployment.dims['time_window'] *
                                             deployment.dims['frequency']), order='F')

            df_deployment = pd.DataFrame(values_arr)
            df_deployment = df_deployment.astype(np.float16)
            for env in ENV_LABELS:
                df_deployment[env] = deployment[env].values

            for sam in SAMPLE_LABELS:
                df_deployment[sam] = deployment_row.loc[sam]

            # Add the geometry
            geo_series = geopandas.GeoSeries(data=geopandas.points_from_xy(x=deployment['lon'],
                                                                           y=deployment['lat']),
                                             crs='EPSG:4326')

            df_deployment = geopandas.GeoDataFrame(df_deployment, geometry=geo_series)

            # Add the corresponding label by reading the csv with labels
            df_deployment['datetime'] = deployment.grouped_datetime
            df_deployment['filename'] = deployment.sel(time_window=0).file_path.values
            df_deployment['grouped_start_sample'] = deployment.grouped_start_sample.values
            df_deployment['grouped_end_sample'] = deployment.grouped_end_sample.values
            df_deployment['label'] = 'unknown'
            for _, label_row in labels_bad_data.iterrows():
                if deployment_file_name == label_row.filepath:
                    mask_label = (df_deployment.datetime < (label_row.end_datetime -
                                                            datetime.timedelta(seconds=CHUNK_LENGTH))) & \
                                 (df_deployment.datetime > label_row.start_datetime)
                    if len(mask_label) > 0:
                        df_deployment.loc[mask_label, 'label'] = label_row.label

            df = pd.concat([df, df_deployment], ignore_index=True)

# print the total acoustic time
print('Total amount of time recorded %s h' % (total_acoustic_time.values / np.timedelta64(1, 'h')))

  0%|          | 0/56 [00:00<?, ?it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


  4%|▎         | 2/56 [00:00<00:23,  2.31it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


  7%|▋         | 4/56 [00:01<00:14,  3.57it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


  9%|▉         | 5/56 [00:01<00:12,  4.22it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 11%|█         | 6/56 [00:01<00:12,  4.13it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 16%|█▌        | 9/56 [00:02<00:09,  5.10it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 18%|█▊        | 10/56 [00:02<00:08,  5.40it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 20%|█▉        | 11/56 [00:02<00:07,  5.97it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 25%|██▌       | 14/56 [00:03<00:08,  5.18it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 27%|██▋       | 15/56 [00:03<00:07,  5.22it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495 

 34%|███▍      | 19/56 [00:04<00:06,  5.89it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 36%|███▌      | 20/56 [00:04<00:05,  6.38it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 43%|████▎     | 24/56 [00:04<00:03,  8.68it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495 

 46%|████▋     | 26/56 [00:04<00:03,  9.45it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495 

 48%|████▊     | 27/56 [00:05<00:04,  6.31it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 50%|█████     | 28/56 [00:05<00:05,  5.29it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 54%|█████▎    | 30/56 [00:05<00:05,  4.60it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 57%|█████▋    | 32/56 [00:06<00:04,  5.98it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495 

 62%|██████▎   | 35/56 [00:06<00:03,  5.52it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 66%|██████▌   | 37/56 [00:06<00:02,  6.46it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 70%|██████▉   | 39/56 [00:07<00:03,  5.11it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 73%|███████▎  | 41/56 [00:07<00:02,  6.51it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 77%|███████▋  | 43/56 [00:08<00:02,  5.63it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 82%|████████▏ | 46/56 [00:08<00:01,  6.19it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 84%|████████▍ | 47/56 [00:08<00:01,  5.40it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 86%|████████▌ | 48/56 [00:09<00:01,  5.16it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 88%|████████▊ | 49/56 [00:09<00:01,  3.79it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 91%|█████████ | 51/56 [00:10<00:01,  3.80it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 95%|█████████▍| 53/56 [00:10<00:00,  3.86it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


 98%|█████████▊| 55/56 [00:11<00:00,  3.34it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]


100%|██████████| 56/56 [00:11<00:00,  4.85it/s]

[   62.5           78.74506562    99.21256575   125.
   157.49013124   198.4251315    250.           314.98026247
   396.85026299   500.           629.96052495   793.70052598
  1000.          1259.92104989  1587.40105197  2000.
  2519.84209979  3174.80210394  4000.          5039.68419958
  6349.60420787  8000.         10079.36839916 12699.20841575
 16000.         20158.73679832 25398.41683149 32000.
 40317.47359664]
Total amount of time recorded 40.7910209025 h





## Some data clean up

In [None]:
# Change the data types to save some computational power and memory
# Some operations
df = df.replace(['Civil twilight', 'Astronomical twilight', 'Nautical twilight'], ['Twilight', 'Twilight', 'Twilight'])
df['shipwreck_distance'] = np.log(df['shipwreck_distance'])
df['bathymetry'] = -1 * df['bathymetry']
df['distance_to_bottom'] = df['bathymetry'] - df['instrument_depth']

# Categorical vars to category for efficient storage and processing
for env, env_type in vars_dtypes.items():
    df[env] = df[env].astype(env_type)

## Save the outputs to work on with the next script

In [None]:
# Filter the deployments to skip if there were any
bad_deployments = df.etn_id.astype(int).isin(DEPLOYMENTS_TO_SKIP_ETN)
df_good_deployments = df.loc[~bad_deployments]
np.save(processed_data_path.joinpath('used_freqticks.npy'), clean_freqticks)
df_good_deployments.to_pickle(processed_data_path.joinpath('df_complete.pkl'))