The contents of this script are independently licensed under the Creative Commons Attribution 4.0 International License (CC BY 4.0). This licensing applies solely to this script and does not affect the licensing terms of the source repository, should they differ. © 2024 Luca Kunz.

# Read the TRAPS GPGP dataset

Load the yearly TRAPS GPGP pickle files and concatenate them into a pandas DataFrame.

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import time
import datetime
import pickle

from IPython.display import display, Audio
# import jupyter notebook files like regular python modules
import import_ipynb
from aa_define_classes import TRAPSdata

importing Jupyter notebook from aa_define_classes.ipynb


In [2]:
# measure the computation time for the entire script
start_script_timer = time.perf_counter()

# Preliminary

## Pickle object saving function

In [3]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # overwrites any existing file.
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

## Files and paths

In [4]:
# if script is running in jupyter lab
if sys.argv[0].endswith("ipykernel_launcher.py"):
    # set the velocity product
    vel_product_ID = 1
    epsilon_ID = 1
    notebook_run = True
    # save_fig = True
    save_fig = False


# if script is running as python script
else:
    # read in product from bash
    vel_product_ID = int(sys.argv[1])
    # read in epsilon from bash
    epsilon_ID = int(sys.argv[2])
    notebook_run = False
    save_fig = True


vel_product_short = ['ENSRYS_24HM', 'MULTIOBS_24HI', 'MULTIOBS_24HM', 'SEALEVEL_24HI'][vel_product_ID]

vel_product_long = ['CMEMS GLOBAL_REANALYSIS_PHY_001_031 ENSEMBLE MEAN (1/4°, 24HM)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HI)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HM)', 
                    'CMEMS SEALEVEL_GLO_PHY_L4_MY_008_047 (1/4°, 24HI)'][vel_product_ID]


years = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', 
         '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

epsilon_value = [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0][epsilon_ID]
epsilon_string = ['_e010', '_e025', '_e050', '_e075', '_e100', '_e125', '_e150', '_e175', '_e200'][epsilon_ID]

In [5]:
# define paths to files
pkl_TRAPS_importpath = 'export_pkl/' + vel_product_short + '/20XX/'

# Import DataFrames

## Load TRAPS data

We load the yearly dataframes and concatenate them to one 20-years dataframe since this performs better than loading a complete 20-years dataframe. Each row in the dataframe corresponds to one TRAP detection. Multiple detections can be associated with one TRAP trajectory. If all attributes of the dataframes are loaded, the reading will take up approx. 30GB of memory. But for most applications it will be sufficient to load a subset of attributes. Selecting specific attributes to load will significantly save memory and speed up the loading. The following lists all TRAP `attributes` with a short description. Attributes in <code><mark style="background-color:orange">orange</mark></code> take up much memory, omitting them reduces the memory usage to approx. 3GB:


`TRAP_ID`: the count number of a TRAP detection in a given snapshot 

`time`: the time and date of detection

`core_lon`: the longitude of the TRAP core

`core_lat`: the latitude of the TRAP core

`core_attraction`: the attraction rate $s_1$ of the TRAP core

<code><mark style="background-color:orange">curve_lons</mark></code>: the longitudes of all points along the TRAP curve

<code><mark style="background-color:orange">curve_lats</mark></code>: the latitudes of all points along the TRAP curve

<code><mark style="background-color:orange">curve_attractions</mark></code>: the attraction rates $s_1$ of all points along the TRAP curve  

`origin_ID`: the label/identifier of the associated TRAP trajectory

`lifetime`: the number of days for which the associated TRAP trajectory exists

`age`: the age of the current TRAP trajectory at the current instance

`track_s1_peak`: the minimum core attraction rate $s_1$ along the TRAP trajectory

`core_U`: the zonal velocity of the current TRAP instance, positive Eastwards

`core_V`: the meridional velocity of the current TRAP instance, positive Northwards

`vircle`: indicator if the current instance has a vorticity circle defined around it

`phase_shift`: the angle of the longer branch of the TRAP with the zonal axis 0° pointing Eastwards

<code><mark style="background-color:orange">vircle_lons</mark></code>: the longitudes of the vorticity circle parameterisation

<code><mark style="background-color:orange">vircle_lats</mark></code>: the latitudes of the vorticity circle parameterisation

<code><mark style="background-color:orange">vircle_VORs</mark></code>: the relative vorticity at the points along the vorticity circle

<code><mark style="background-color:orange">vircle_VORs_NBV</mark></code>: the relative vorticity at the points along the vorticity circle minus the average vorticity of all respective points (no background vorticity)

`pattern_flag`: the identifier of the detected vorticity pattern

`pattern_agreement`: the ratio of points along the vorticity circle that agree with the detected vorticity pattern

`pattern_flag_NBV`: the identifier of the detected vorticity pattern (no background vorticity)

`pattern_agreement_NBV`: the ratio of points along the vorticity circle that agree with the detected vorticity pattern (no background vorticity)

`configu_flag`: the identifier of the detected vorticity configuration

`configu_flag_NBV`: the identifier of the detected vorticity configuration (no background vorticity)

`qorder_flag`: the quadrupole order of the detected vorticity configuration

`qorder_flag_NBV`: the quadrupole order of the detected vorticity configuration (no background vorticity)



In [6]:
# comment out the attributes which shall not be loaded
# or simply define a list by yourself
columns_to_load = [
    'TRAP_ID', 
    'time', 
    'core_lon', 
    'core_lat', 
    'core_attraction', 
    # 'curve_lons', 
    # 'curve_lats', 
    # 'curve_attractions', 
    'origin_ID', 
    'lifetime', 
    'age', 
    'track_s1_peak', 
    'core_U', 
    'core_V', 
    'vircle', 
    'phase_shift', 
    # 'vircle_lons', 
    # 'vircle_lats', 
    # 'vircle_VORs', 
    # 'vircle_VORs_NBV', 
    'pattern_flag', 
    'pattern_agreement', 
    'pattern_flag_NBV', 
    'pattern_agreement_NBV', 
    'configu_flag', 
    'configu_flag_NBV', 
    'qorder_flag', 
    'qorder_flag_NBV'
]

In [7]:
# the lists to store the subdataframes to feed into pd.concat() after the loop
pd_TRAPS_sdfs = []

for year in years:
    
    start_timer = time.perf_counter()
    
    # define the pkl files to load
    pkl_TRAPS_importname = vel_product_short + epsilon_string + '_TRAPS_GPGP_' + year + '.pkl'
    
    # load the pickle objects
    with open(pkl_TRAPS_importpath + pkl_TRAPS_importname, 'rb') as inp:
                
        # load the full dataset
        # pd_TRAPS_sdfs.append(pickle.load(inp).pd_TRAPS_df.copy())

        # or select attributes to save memory and speed up the loading
        pd_TRAPS_sdfs.append(pickle.load(inp).pd_TRAPS_df[columns_to_load].copy())

        
    stop_timer = time.perf_counter()
    print('loaded ' + pkl_TRAPS_importname + f' in: {stop_timer - start_timer:0.1f} seconds')

              
# get the description of the dataset, it's the same for all years
with open(pkl_TRAPS_importpath + pkl_TRAPS_importname, 'rb') as inp: trapsgpgp_description = pickle.load(inp).product_long

# stack the yearly dataframes
pd_TRAPS_df = pd.concat(pd_TRAPS_sdfs, copy=True)

# reset the index after stacking
pd_TRAPS_df.reset_index(drop=True, inplace=True)

# save memory
del pd_TRAPS_sdfs

loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2000.pkl in: 4.8 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2001.pkl in: 4.4 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2002.pkl in: 4.2 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2003.pkl in: 4.1 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2004.pkl in: 4.1 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2005.pkl in: 4.2 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2006.pkl in: 4.2 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2007.pkl in: 4.3 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2008.pkl in: 4.3 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2009.pkl in: 4.1 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2010.pkl in: 5.0 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2011.pkl in: 4.1 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2012.pkl in: 4.5 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2013.pkl in: 4.7 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2014.pkl in: 4.3 seconds
loaded MULTIOBS_24HI_e025_TRAPS_GPGP_2015.pkl in: 4.2 seconds
loaded M

# Show DataFrame

In [8]:
pd_TRAPS_df

Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,origin_ID,lifetime,age,track_s1_peak,core_U,...,vircle,phase_shift,pattern_flag,pattern_agreement,pattern_flag_NBV,pattern_agreement_NBV,configu_flag,configu_flag_NBV,qorder_flag,qorder_flag_NBV
0,0,2000-01-01,-159.045688,23.400981,-0.625117,200001010000 000,16,1,-0.650914,,...,False,3.609645,99,99.000000,99,99.000000,Z,Z,99,99
1,1,2000-01-01,-159.503564,24.127516,-0.298961,200001010000 001,1,1,-0.298961,,...,False,5.546844,99,99.000000,99,99.000000,Z,Z,99,99
2,2,2000-01-01,-157.098753,23.351263,-0.484728,200001010000 002,20,1,-0.592701,,...,False,5.048653,99,99.000000,99,99.000000,Z,Z,99,99
3,3,2000-01-01,-157.279485,25.245133,-0.357612,200001010000 003,17,1,-0.391277,,...,True,3.524186,5,0.819444,6,0.833333,E,B,1,0
4,4,2000-01-01,-157.825133,24.521705,-0.270798,200001010000 004,26,1,-0.359810,,...,True,3.142709,1,0.847222,10,0.694444,A,F,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4076060,604,2019-12-31,-143.433040,37.663826,-0.185681,201912100000 161,22,22,-0.315532,,...,True,6.135567,6,0.791667,6,0.847222,B,B,0,0
4076061,605,2019-12-31,-142.775877,37.750317,-0.119429,201912300000 423,2,2,-0.129556,,...,True,4.501250,2,0.819444,2,0.833333,E,E,1,1
4076062,606,2019-12-31,-141.216740,31.007622,-0.192688,201912290000 544,3,3,-0.206966,,...,True,0.561322,6,0.763889,5,0.777778,B,E,0,1
4076063,607,2019-12-31,-141.283859,28.264153,-0.098530,201912290000 161,3,3,-0.134088,,...,True,3.951574,6,0.833333,6,0.819444,B,B,0,0


# Show metadata

In [9]:
trapsgpgp_description

'TRansient Attracting Profiles (TRAPs, Serra et al. (2020) published at https://doi.org/10.1038/s41467-020-16281-x) have been computed with code by Mattia Serra (https://github.com/MattiaSerra/TRAPs) and post-processed with code by Luca Kunz (https://github.com/kunzluca/trapsgpgp) using daily snapshots of near-surface geostrophic + Ekman currents from the product Global Total Surface and 15m Current (COPERNICUS-GLOBCURRENT) from Altimetric Geostrophic Current and Modeled Ekman Current Reprocessing that is provided by the E.U. Copernicus Marine Service (https://doi.org/10.48670/moi-00050). This work is a contribution to the project L3 (Meso- to submesoscale turbulence in the ocean) of the Collaborative Research Centre TRR 181 Energy Transfer in Atmosphere and Ocean funded by the German Research Foundation (DFG) and has been conducted in collaboration with The Ocean Cleanup. This dataset is licensed under the Creative Commons Attribution 4.0 International License (CC BY 4.0). © 2024 Luca

# End sound

In [10]:
# measure the computation time for the entire script
stop_script_timer = time.perf_counter()
print(f'overall computation time: {stop_script_timer - start_script_timer:0.3f} seconds')

overall computation time: 91.829 seconds


In [11]:
#https://gist.github.com/tamsanh/a658c1b29b8cba7d782a8b3aed685a24

framerate = 4410
play_time_seconds = 1

t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
# G-Dur
#audio_data = np.sin(2*np.pi*391*t) + np.sin(2*np.pi*493*t) + np.sin(2*np.pi*587*t)
# D-Dur
audio_data = np.sin(2*np.pi*293*t) + np.sin(2*np.pi*369*t) + np.sin(2*np.pi*440*t)
Audio(audio_data, rate=framerate, autoplay=True)