© 2024 Luca Kunz. Commercial use is subject to the terms of the source repository's license. All other commercial rights are reserved.

Building the TRAPS data object
==

Build the overall TRAPS dataframe for every individual year, objectify and save it.  
Also save a reduced version of the TRAPS dataframe which only contains cores and raw/interpolated curves.  
These reduced TRAPS dataframes will be easier to load in later analysis.

In [1]:
import os
import sys
import numpy as np
from scipy.interpolate import interp1d
import pandas as pd
import time
import datetime
import os
import pickle

from IPython.display import display, Audio
# import jupyter notebook files like regular python modules
import import_ipynb
from aa_define_classes import interpol_along_curve, TRAPSdata

importing Jupyter notebook from aa_define_classes.ipynb


In [2]:
# measure the computation time for the entire script
start_script_timer = time.perf_counter()

# Preliminary

## Pickle object saving function

In [3]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # overwrites any existing file.
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

## Files and paths

In [4]:
# if script is running in jupyter lab
if sys.argv[0].endswith("ipykernel_launcher.py"):
    # set the velocity product
    vel_product_ID = 1
    year_ID = 0
    notebook_run = True
    # save_fig = True
    save_fig = False

    
# if script is running as python script
else:
    # read in product from bash
    vel_product_ID = int(sys.argv[1])
    # read in year from bash
    year_ID = int(sys.argv[2])
    notebook_run = False
    save_fig = True


vel_product_short = ['ENSRYS_24HM', 'MULTIOBS_24HI', 'MULTIOBS_24HM', 'SEALEVEL_24HI'][vel_product_ID]

vel_product_long = ['CMEMS GLOBAL_REANALYSIS_PHY_001_031 ENSEMBLE MEAN (1/4°, 24HM)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HI)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HM)', 
                    'SEALEVEL_GLO_PHY_L4_NRT_OBSERVATIONS_008_046'][vel_product_ID]

years = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', 
         '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

year = years[year_ID]


In [5]:
# define paths to files
xTC_csvpath = 'export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_xTC_' + year + '_df.csv'
yTC_csvpath = 'export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_yTC_' + year + '_df.csv'
pxt_csvpath = 'export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_pxt_' + year + '_df.csv'
pyt_csvpath = 'export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_pyt_' + year + '_df.csv'
s1TC_csvpath = 'export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_s1TC_' + year + '_df.csv'

In [6]:
pkl_exportpath = 'export_pkl/' + vel_product_short + '/20XX/'

# Import DataFrames

Read the data from the respective csv files and put it into a pandas DataFrame.

In [7]:
start_timer = time.perf_counter()

In [8]:
# xTC: x-component trap cores, yTC: y-component trap cores - vector
pd_xTC_df = pd.read_csv(xTC_csvpath)
pd_yTC_df = pd.read_csv(yTC_csvpath)

# pxt: x-coordinates of TRAPs, pyt: y-coordinates of TRAPs - size: [#TRAPs, #points along a TRAP]
# coordinates with Nan indicate regions of the tensor lines that do not satisfy the desired attraction properties
pd_pxt_df = pd.read_csv(pxt_csvpath)
pd_pyt_df = pd.read_csv(pyt_csvpath)

# s1TC: attraction rate at trap cores - vector
pd_s1TC_df = pd.read_csv(s1TC_csvpath)

In [9]:
stop_timer = time.perf_counter()
print(f'task time: {stop_timer - start_timer:0.4f} seconds')

task time: 2.5254 seconds


## Assertions

Check for integrity of the different files.

In [10]:
# assert that coordinate arrays are of same shape
assert pd_xTC_df.shape==pd_yTC_df.shape, 'TRAP cores: different number of x- and y-coordinates'
assert pd_pxt_df.shape==pd_pyt_df.shape, 'TRAP curves: different number of x- and y-coordinates'

# assert that number of TRAP cores equals number of TRAP curves and number of attraction rates
assert pd_xTC_df.shape[0]==pd_pxt_df.shape[0], 'mismatch number of TRAP cores and curves'
assert pd_xTC_df.shape==pd_s1TC_df.shape, 'mismatch number of TRAP cores and number of attraction rates'

# assert that data is given for exactly the same timestamps
assert pd_xTC_df.time.to_list()==pd_yTC_df.time.to_list()==pd_pxt_df.time.to_list(), 'mismatching timestamps'
assert pd_pxt_df.time.to_list()==pd_pyt_df.time.to_list()==pd_s1TC_df.time.to_list(), 'mismatching timestamps'

# define one array for all unique timestamps, call them timestrings since these are no datetime objects yet
#timestrings = np.unique(pd_xTC_df.time)

In [11]:
# print check
pd_xTC_df
#pd_yTC_df
#pd_pxt_df
#pd_pyt_df.shape
#pd_s1TC_df
#timestrings

Unnamed: 0,TRAP_ID,time,0
0,0,200001010000,-159.045688
1,1,200001010000,-159.503564
2,2,200001010000,-157.098753
3,3,200001010000,-157.279485
4,4,200001010000,-157.825133
...,...,...,...
211409,602,200012310000,-141.635983
211410,603,200012310000,-141.498361
211411,604,200012310000,-142.074919
211412,605,200012310000,-141.846596


# TRAPS DataFrame
Put cores and tensorlines into one DataFrame.  
It's faster to loop through arrays than through an entire DataFrame, thus get the arrays first, assign them to a DataFrame column and keep them for later loops.

In [12]:
start_timer = time.perf_counter()

In [13]:
pd_TRAPS_df = pd_xTC_df.rename(columns={'0': 'core_lon'}) # returns a copy
pd_TRAPS_df['core_lat'] = pd_yTC_df['0'].copy()
pd_TRAPS_df['core_attraction'] = pd_s1TC_df['0'].copy()

In [14]:
# aggregate lons & lats of a TRAP curve into one array each
# we don't want TRAP_ID or time to show up in the aggregated lists, thus drop these columns first
# raw_ for the original TRAP curve, more explaination below
raw_curve_lons = pd_pxt_df.drop(columns=['TRAP_ID', 'time']).agg(func=np.array, axis=1).to_numpy() # this is an array of arrays
raw_curve_lats = pd_pyt_df.drop(columns=['TRAP_ID', 'time']).agg(func=np.array, axis=1).to_numpy() # this is an array of arrays

pd_TRAPS_df['raw_curve_lons'] = raw_curve_lons
pd_TRAPS_df['raw_curve_lats'] = raw_curve_lats

## Timestrings to timestamps

Convert the timestrings to datetime objects.

In [15]:
pd_TRAPS_df['time'] = pd.to_datetime(pd_TRAPS_df.time, format='%Y%m%d%H%M')

In [16]:
stop_timer = time.perf_counter()
print(f'task time: {stop_timer - start_timer:0.4f} seconds')

task time: 2.8116 seconds


In [17]:
# print check
pd_TRAPS_df#.time[0]
#pd_TRAPS_df.raw_curve_lons[927]
#pd_TRAPS_df.raw_curve_lats[927]


Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,raw_curve_lons,raw_curve_lats
0,0,2000-01-01,-159.045688,23.400981,-0.625117,"[nan, -159.465202832297, -159.44635348818, -15...","[nan, 23.1889083623293, 23.2053297116176, 23.2..."
1,1,2000-01-01,-159.503564,24.127516,-0.298961,"[nan, -159.159668021201, -159.18377668114, -15...","[nan, 23.8158077645795, 23.8224124464569, 23.8..."
2,2,2000-01-01,-157.098753,23.351263,-0.484728,"[nan, nan, nan, -156.959155383537, -156.961825...","[nan, nan, nan, 22.9518861202522, 22.976740373..."
3,3,2000-01-01,-157.279485,25.245133,-0.357612,"[-157.737693931696, -157.719258182722, -157.70...","[25.0607392199964, 25.077622457608, 25.0936600..."
4,4,2000-01-01,-157.825133,24.521705,-0.270798,"[nan, -158.294683608633, -158.270308767516, -1...","[nan, 24.521180619821, 24.5156263187706, 24.51..."
...,...,...,...,...,...,...,...
211409,602,2000-12-31,-141.635983,39.745526,-0.172387,"[nan, nan, nan, nan, nan, -141.985759360558, -...","[nan, nan, nan, nan, nan, 39.6437173355492, 39..."
211410,603,2000-12-31,-141.498361,33.983257,-0.253132,"[nan, nan, nan, nan, nan, nan, nan, -141.58688...","[nan, nan, nan, nan, nan, nan, nan, 33.6745512..."
211411,604,2000-12-31,-142.074919,28.771750,-0.184957,"[nan, -141.668381906843, -141.684968642171, -1...","[nan, 28.5406194513576, 28.5593230291279, 28.5..."
211412,605,2000-12-31,-141.846596,39.002991,-0.277369,"[nan, nan, -142.288278546339, -142.26349450435...","[nan, nan, 38.9324068245497, 38.9291642915645,..."


In [18]:
# cleanup and save memory
del pd_xTC_df, pd_yTC_df, pd_pxt_df, pd_pyt_df, pd_s1TC_df
del raw_curve_lons, raw_curve_lats

# Remove non-attracting TRAPs

For some reason, there are a few TRAP cores that are not attracting.  
This error must stem from the MATLAB computation and needs to be corrected here.  
Otherwise, the attracting/neutral TRAP corres will mess up with the following analysis.  
Separate these attracting/neutral TRAPs from the DataFrame and save them for review if needed.  
Move on with the clean DataFrame.

In [19]:
pd_ERRORTRAPS_df = pd_TRAPS_df[pd_TRAPS_df.core_attraction >= 0].copy()
pd_TRAPS_df = pd_TRAPS_df[pd_TRAPS_df.core_attraction < 0].copy()

# reset the index otherwise it would have jumps and throw errors later
pd_TRAPS_df.reset_index(drop=True, inplace=True)

In [20]:
# print check
pd_ERRORTRAPS_df
#pd_TRAPS_df

Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,raw_curve_lons,raw_curve_lats


In [21]:
# reload the raw arrays since the removal of non-attracting TRAPs has reset the DataFrame indices
# use these arrays later for loops and calculations
core_lon = pd_TRAPS_df.core_lon.to_numpy()
core_lat = pd_TRAPS_df.core_lat.to_numpy()

raw_curve_lons = pd_TRAPS_df.raw_curve_lons.to_numpy()
raw_curve_lats = pd_TRAPS_df.raw_curve_lats.to_numpy()

# Truncate TRAP curves to continuous ones

Arrays for TRAP curves contain NaN values due to insufficient attraction rate at respective points. This may occur anywhere along the TRAP and thus cause discontinuous TRAPs. We want continuous TRAPs with no gaps. Truncating the TRAP curves to where the first discontinuities occur both on the left and right side of the core position will give us the inner continuous version of a TRAP curve.

As a next step one could interpolate to equal distances between curve points and by this remove further discontinuities there, this is done at the end of the script.

To truncate, we need to start at the array position/curve point closest to the TRAP core and then move both to left and right, checking the array elements for NaN values and slice the array there.

The array index of the curve point closest to the TRAP core can be calculated for arbitrary TRAP core positions along the array using the lower code in comments. First applications revealed that this is actually always the same index for any TRAP and can be infered from the way TRAPs were constructed with lower and upper branches in the MATLAB code:

Upper and lower branch each contain NumPointsOnCurve/2 points. They are connected by overlaping in one point, which is the position of the TRAP core. Thus the curve consists of NumPointsOnCurve-1 points. And the TRAP core is at point number NumPointsOnCurve/2. In terms of a numpy array, this relates to the core at index NumPointsOnCurve/2-1:

+ NumPointsOnCurve-1 = raw_curve_lons.size
+ core_index = (raw_curve_lons.size+1)/2 - 1




In [22]:
# gives the array index of the point closest to the TRAP core
# for the unlikely case of two points being equally close to the TRAP core, 
# it will return the index of the point that occurs first in the array
# which is okay, since we only need some point to start
# for a given TRAP, e.g. 
#ix = 540
#core_index = np.where(pd_TRAPS_df.loc[ix].curve_core_distances==np.nanmin(pd_TRAPS_df.loc[ix].curve_core_distances))[0][0]

In [23]:
# infer the original MATLAB parameter NumPointsOnCurve, call it num_points_on_curve
# but due to overlap of the branches, the actual number is actually one point less, call it actual_points_on_curve
# pd_pxt_df has same shape as pd_pyt_df as asserted previously
actual_points_on_curve = pd_TRAPS_df.loc[0].raw_curve_lons.size
num_points_on_curve = actual_points_on_curve + 1

core_index = int(num_points_on_curve/2 - 1)
core_index

20

In [24]:
# use **0.5 instead of np.sqrt since the latter throws the error
# loop of ufunc does not support argument 0 of type numpy.ndarray which has no callable sqrt method
raw_curve_core_distances = ((raw_curve_lons-core_lon)**2 + (raw_curve_lats-core_lat)**2)**0.5

for ix in pd_TRAPS_df.index:
    # assert that for every curve the curve_core_distance at the core_index is always zero
    # this would also highlight nan values at the core position
    assert raw_curve_core_distances[ix][core_index]==0, 'non-zero curve-core distance at core index'

# assign to DataFrame
pd_TRAPS_df['raw_curve_core_distances'] = raw_curve_core_distances

In [25]:
# the maximum index of the original, raw arrays
max_index = actual_points_on_curve - 1

# initialise the new arrays of the truncated curves
trunc_curve_lons = []
trunc_curve_lats = []

for ix in pd_TRAPS_df.index: # iterates through all TRAPs

    left_index = core_index # scan elements to the left of the 1D array
    right_index = core_index # scan elements to the right of the 1D array
    
    # in the following use curve_core_distances since this naturally bears nan if only one of both coordinates is nan, 
    # so this already works as a combined filter
    current_array = raw_curve_core_distances[ix]
    
    # as soon as we are at index 0 or encounter a nan value at the preceding index, we are at the left end of the TRAP
    # and slice everything before the index position
    while (left_index > 0 and ~np.isnan(current_array[left_index-1])): left_index -= 1

    # as soon as we are at max_index+1 or encounter a nan value at the current index, we are at the right end of the TRAP
    # and slice off everything beyond the index position, including the index position, considering array slicing notion
    while (right_index <= max_index and ~np.isnan(current_array[right_index])): right_index += 1

    # these indices now work on every curve coordinate array since they all have the same shape
    trunc_curve_lons.append(raw_curve_lons[ix][left_index:right_index])
    trunc_curve_lats.append(raw_curve_lats[ix][left_index:right_index])


# assert that the truncated lons/lats arrays contain no NaNs
assert np.all(~np.isnan(np.concatenate(trunc_curve_lons))), 'NaN values in truncated lons arrays'
assert np.all(~np.isnan(np.concatenate(trunc_curve_lats))), 'NaN values in truncated lats arrays'
    
# assign the truncated curves to the DataFrame
pd_TRAPS_df['trunc_curve_lons'] = trunc_curve_lons
pd_TRAPS_df['trunc_curve_lats'] = trunc_curve_lats


In [26]:
# print check
#type(pd_TRAPS_df.loc[0].trunc_curve_lats)
#pd_TRAPS_df#.trunc_curve_lons
#pd_TRAPS_df.trunc_curve_lats
pd_TRAPS_df

Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,raw_curve_lons,raw_curve_lats,raw_curve_core_distances,trunc_curve_lons,trunc_curve_lats
0,0,2000-01-01,-159.045688,23.400981,-0.625117,"[nan, -159.465202832297, -159.44635348818, -15...","[nan, 23.1889083623293, 23.2053297116176, 23.2...","[nan, 0.47007121571104077, 0.4458831010378982,...","[-159.465202832297, -159.44635348818, -159.427...","[23.1889083623293, 23.2053297116176, 23.221189..."
1,1,2000-01-01,-159.503564,24.127516,-0.298961,"[nan, -159.159668021201, -159.18377668114, -15...","[nan, 23.8158077645795, 23.8224124464569, 23.8...","[nan, 0.4641403346667206, 0.4419863151420326, ...","[-159.159668021201, -159.18377668114, -159.207...","[23.8158077645795, 23.8224124464569, 23.830408..."
2,2,2000-01-01,-157.098753,23.351263,-0.484728,"[nan, nan, nan, -156.959155383537, -156.961825...","[nan, nan, nan, 22.9518861202522, 22.976740373...","[nan, nan, nan, 0.423071398745495, 0.398768453...","[-156.959155383537, -156.961825801318, -156.96...","[22.9518861202522, 22.9767403734308, 23.001426..."
3,3,2000-01-01,-157.279485,25.245133,-0.357612,"[-157.737693931696, -157.719258182722, -157.70...","[25.0607392199964, 25.077622457608, 25.0936600...","[0.4939197260788712, 0.4705958281073851, 0.447...","[-157.737693931696, -157.719258182722, -157.70...","[25.0607392199964, 25.077622457608, 25.0936600..."
4,4,2000-01-01,-157.825133,24.521705,-0.270798,"[nan, -158.294683608633, -158.270308767516, -1...","[nan, 24.521180619821, 24.5156263187706, 24.51...","[nan, 0.469551090053364, 0.4452174541020112, 0...","[-158.294683608633, -158.270308767516, -158.24...","[24.521180619821, 24.5156263187706, 24.5105287..."
...,...,...,...,...,...,...,...,...,...,...
211409,602,2000-12-31,-141.635983,39.745526,-0.172387,"[nan, nan, nan, nan, nan, -141.985759360558, -...","[nan, nan, nan, nan, nan, 39.6437173355492, 39...","[nan, nan, nan, nan, nan, 0.3642918433758386, ...","[-141.985759360558, -141.962170072696, -141.93...","[39.6437173355492, 39.6356071007287, 39.632333..."
211410,603,2000-12-31,-141.498361,33.983257,-0.253132,"[nan, nan, nan, nan, nan, nan, nan, -141.58688...","[nan, nan, nan, nan, nan, nan, nan, 33.6745512...","[nan, nan, nan, nan, nan, nan, nan, 0.32114838...","[-141.58688784844, -141.57336288308, -141.5612...","[33.6745512798819, 33.6955711165605, 33.717421..."
211411,604,2000-12-31,-142.074919,28.771750,-0.184957,"[nan, -141.668381906843, -141.684968642171, -1...","[nan, 28.5406194513576, 28.5593230291279, 28.5...","[nan, 0.4676466170902086, 0.44405646332697735,...","[-141.668381906843, -141.684968642171, -141.70...","[28.5406194513576, 28.5593230291279, 28.577459..."
211412,605,2000-12-31,-141.846596,39.002991,-0.277369,"[nan, nan, -142.288278546339, -142.26349450435...","[nan, nan, 38.9324068245497, 38.9291642915645,...","[nan, nan, 0.44728670090562045, 0.423384636862...","[-142.288278546339, -142.263494504359, -142.23...","[38.9324068245497, 38.9291642915645, 38.927636..."


# Estimate truncation

Find out, how many of the dataset's raw TRAP points were removed since they belonged to discontinuous parts of the TRAP curve, i.e. behind a nan-gap in the TRAP.  Nan points/points of insufficient attraction rate are not counted in.  

By how much is the number of TRAP points reduced when looking only at truncated TRAPs?

In [27]:
all_lons_raw = np.concatenate(raw_curve_lons) # contains NaNs
all_lons_raw = all_lons_raw[~np.isnan(all_lons_raw)] # remove NaNs

all_lons_trunc = np.concatenate(trunc_curve_lons) # contains no NaNs

num_points_raw = all_lons_raw.size
num_points_trunc = all_lons_trunc.size

print(num_points_raw, ' TRAP points before truncation')
print(num_points_trunc, ' TRAP points after truncation')
print()
print('number of TRAP points reduced by ', (1 - num_points_trunc/num_points_raw)*100, ' %')

7849782  TRAP points before truncation
7845001  TRAP points after truncation

number of TRAP points reduced by  0.06090615000517996  %


# Interpolate to equal distances

After truncation, TRAP curves bear no more NaN values but points on the truncated curve can still be unequally distributed, leading to TRAP points with variable distance between one another.  

We want to interpolate these points to equidistant points along a TRAP curve such that TRAP curves show no more gaps, i.e. are finally continuous in both senses. This will allow for a correct counting of TRAP curve occurences in the histograms later.

The interpolation can be performed using np.linspace() or np.arange() for the creation of the interpolation points. Both methods lead to different distributions of segment sizes, we choose to use the np.arange() results.

In [28]:
start_timer = time.perf_counter()

## Interpolate using np.linspace()

In [29]:
# initialise the new arrays of the interpolated curves
interpol_linspace_curve_lons = []
interpol_linspace_curve_lats = []
interpol_arange_curve_lons = []
interpol_arange_curve_lats = []


for ix in pd_TRAPS_df.index: # iterate through all TRAPs

    # get the points of the truncated version of the current TRAP curve
    xs=trunc_curve_lons[ix]
    ys=trunc_curve_lats[ix]
    
    # interpolate using np.linspace()
    interpolated_lons, interpolated_lats = interpol_along_curve(xs=xs, ys=ys, interpolation_mode='LINSPACE')            
    
    interpol_linspace_curve_lons.append(interpolated_lons)
    interpol_linspace_curve_lats.append(interpolated_lats)

    # interpolate using np.arange()
    interpolated_lons, interpolated_lats = interpol_along_curve(xs=xs, ys=ys, interpolation_mode='ARANGE')            
    
    interpol_arange_curve_lons.append(interpolated_lons)
    interpol_arange_curve_lats.append(interpolated_lats)

# assign the interpolated curves to the DataFrame
pd_TRAPS_df['interpol_linspace_curve_lons'] = interpol_linspace_curve_lons
pd_TRAPS_df['interpol_linspace_curve_lats'] = interpol_linspace_curve_lats
pd_TRAPS_df['interpol_arange_curve_lons'] = interpol_arange_curve_lons
pd_TRAPS_df['interpol_arange_curve_lats'] = interpol_arange_curve_lats

In [30]:
stop_timer = time.perf_counter()
print(f'task time: {stop_timer - start_timer:0.4f} seconds')

task time: 91.9184 seconds


In [31]:
# print check
pd_TRAPS_df

Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,raw_curve_lons,raw_curve_lats,raw_curve_core_distances,trunc_curve_lons,trunc_curve_lats,interpol_linspace_curve_lons,interpol_linspace_curve_lats,interpol_arange_curve_lons,interpol_arange_curve_lats
0,0,2000-01-01,-159.045688,23.400981,-0.625117,"[nan, -159.465202832297, -159.44635348818, -15...","[nan, 23.1889083623293, 23.2053297116176, 23.2...","[nan, 0.47007121571104077, 0.4458831010378982,...","[-159.465202832297, -159.44635348818, -159.427...","[23.1889083623293, 23.2053297116176, 23.221189...","[-159.465202832297, -159.39798312399114, -159....","[23.1889083623293, 23.24305812999977, 23.28997...","[-159.465202832297, -159.40044405039814, -159....","[23.1889083623293, 23.241294361583048, 23.2868..."
1,1,2000-01-01,-159.503564,24.127516,-0.298961,"[nan, -159.159668021201, -159.18377668114, -15...","[nan, 23.8158077645795, 23.8224124464569, 23.8...","[nan, 0.4641403346667206, 0.4419863151420326, ...","[-159.159668021201, -159.18377668114, -159.207...","[23.8158077645795, 23.8224124464569, 23.830408...","[-159.159668021201, -159.24065012637956, -159....","[23.8158077645795, 23.845207033249558, 23.8953...","[-159.159668021201, -159.23795868926015, -159....","[23.8158077645795, 23.84382718973737, 23.89130..."
2,2,2000-01-01,-157.098753,23.351263,-0.484728,"[nan, nan, nan, -156.959155383537, -156.961825...","[nan, nan, nan, 22.9518861202522, 22.976740373...","[nan, nan, nan, 0.423071398745495, 0.398768453...","[-156.959155383537, -156.961825801318, -156.96...","[22.9518861202522, 22.9767403734308, 23.001426...","[-156.959155383537, -156.9726206489647, -156.9...","[22.9518861202522, 23.032493637119288, 23.1104...","[-156.959155383537, -156.97300069417307, -156....","[22.9518861202522, 23.033962489599663, 23.1132..."
3,3,2000-01-01,-157.279485,25.245133,-0.357612,"[-157.737693931696, -157.719258182722, -157.70...","[25.0607392199964, 25.077622457608, 25.0936600...","[0.4939197260788712, 0.4705958281073851, 0.447...","[-157.737693931696, -157.719258182722, -157.70...","[25.0607392199964, 25.077622457608, 25.0936600...","[-157.737693931696, -157.67579981512534, -157....","[25.0607392199964, 25.1112610323883, 25.147804...","[-157.737693931696, -157.672992477558, -157.59...","[25.0607392199964, 25.11306391107641, 25.15033..."
4,4,2000-01-01,-157.825133,24.521705,-0.270798,"[nan, -158.294683608633, -158.270308767516, -1...","[nan, 24.521180619821, 24.5156263187706, 24.51...","[nan, 0.469551090053364, 0.4452174541020112, 0...","[-158.294683608633, -158.270308767516, -158.24...","[24.521180619821, 24.5156263187706, 24.5105287...","[-158.294683608633, -158.21005275272654, -158....","[24.521180619821, 24.50406925898681, 24.493251...","[-158.294683608633, -158.21303988239845, -158....","[24.521180619821, 24.504563408481385, 24.49381..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211409,602,2000-12-31,-141.635983,39.745526,-0.172387,"[nan, nan, nan, nan, nan, -141.985759360558, -...","[nan, nan, nan, nan, nan, 39.6437173355492, 39...","[nan, nan, nan, nan, nan, 0.3642918433758386, ...","[-141.985759360558, -141.962170072696, -141.93...","[39.6437173355492, 39.6356071007287, 39.632333...","[-141.985759360558, -141.9024989815805, -141.8...","[39.6437173355492, 39.634740198073104, 39.6583...","[-141.985759360558, -141.9041207915777, -141.8...","[39.6437173355492, 39.634473490992114, 39.6572..."
211410,603,2000-12-31,-141.498361,33.983257,-0.253132,"[nan, nan, nan, nan, nan, nan, nan, -141.58688...","[nan, nan, nan, nan, nan, nan, nan, 33.6745512...","[nan, nan, nan, nan, nan, nan, nan, 0.32114838...","[-141.58688784844, -141.57336288308, -141.5612...","[33.6745512798819, 33.6955711165605, 33.717421...","[-141.58688784844, -141.54923111934403, -141.5...","[33.6745512798819, 33.74286870570219, 33.81669...","[-141.58688784844, -141.54724872181143, -141.5...","[33.6745512798819, 33.747682851431435, 33.8267..."
211411,604,2000-12-31,-142.074919,28.771750,-0.184957,"[nan, -141.668381906843, -141.684968642171, -1...","[nan, 28.5406194513576, 28.5593230291279, 28.5...","[nan, 0.4676466170902086, 0.44405646332697735,...","[-141.668381906843, -141.684968642171, -141.70...","[28.5406194513576, 28.5593230291279, 28.577459...","[-141.668381906843, -141.72838034723273, -141....","[28.5406194513576, 28.602657535654284, 28.6557...","[-141.668381906843, -141.72614862092027, -141....","[28.5406194513576, 28.6006153253202, 28.652420..."
211412,605,2000-12-31,-141.846596,39.002991,-0.277369,"[nan, nan, -142.288278546339, -142.26349450435...","[nan, nan, 38.9324068245497, 38.9291642915645,...","[nan, nan, 0.44728670090562045, 0.423384636862...","[-142.288278546339, -142.263494504359, -142.23...","[38.9324068245497, 38.9291642915645, 38.927636...","[-142.288278546339, -142.20448715119863, -142....","[38.9324068245497, 38.92857201547881, 38.94329...","[-142.288278546339, -142.20522979965793, -142....","[38.9324068245497, 38.92851369811983, 38.94290..."


# Export sub dataframes

Extract sub dataframes from the overall pd_TRAPS_df and export these as pickle files.  
We want a reduced version of the TRAPS dataframe which only contains cores and certain kind of curves since this will be easier to save and load in later analysis.

## Export raw TRAPS



In [32]:
#pd_TRAPSRAW_df = pd_TRAPS_df[['TRAP_ID', 'time', 
#                              'core_lon', 'core_lat', 'core_attraction', 
#                              'raw_curve_lons', 'raw_curve_lats']].copy()

In [33]:
# print check
#pd_TRAPSRAW_df

In [34]:
# create the object
#TRAPS_data = TRAPSdata(vel_product_short, vel_product_long, pd_TRAPSRAW_df)

In [35]:
# save the object as .pkl file
#start_timer = time.perf_counter()

#pkl_filename = vel_product_short + '_TRAPSRAW_' + year + '.pkl'

#save_object(TRAPS_data, pkl_exportpath + pkl_filename)

#stop_timer = time.perf_counter()
#print()
#print(f'task time: {stop_timer - start_timer:0.4f} seconds')

In [36]:
# cleanup and save memory
#del TRAPS_data, pd_TRAPSRAW_df

## Export trunc TRAPS



In [37]:
#pd_TRAPSTRUNC_df = pd_TRAPS_df[['TRAP_ID', 'time', 
#                                'core_lon', 'core_lat', 'core_attraction', 
#                                'trunc_curve_lons', 'trunc_curve_lats']].copy()

In [38]:
# print check
#pd_TRAPSTRUNC_df

In [39]:
# create the object
#TRAPS_data = TRAPSdata(vel_product_short, vel_product_long, pd_TRAPSTRUNC_df)

In [40]:
# save the object as .pkl file
#start_timer = time.perf_counter()

#pkl_filename = vel_product_short + '_TRAPSTRUNC_' + year + '.pkl'

#save_object(TRAPS_data, pkl_exportpath + pkl_filename)

#stop_timer = time.perf_counter()
#print()
#print(f'task time: {stop_timer - start_timer:0.4f} seconds')

In [41]:
# cleanup and save memory
#del TRAPS_data, pd_TRAPSTRUNC_df

## Export TRAPS interpolated with np.arange()

In [42]:
pd_TRAPSINTERPOL_df = pd_TRAPS_df[['TRAP_ID', 'time', 
                                   'core_lon', 'core_lat', 'core_attraction', 
                                   'interpol_arange_curve_lons', 'interpol_arange_curve_lats']].copy()

In [43]:
# print check
pd_TRAPSINTERPOL_df

Unnamed: 0,TRAP_ID,time,core_lon,core_lat,core_attraction,interpol_arange_curve_lons,interpol_arange_curve_lats
0,0,2000-01-01,-159.045688,23.400981,-0.625117,"[-159.465202832297, -159.40044405039814, -159....","[23.1889083623293, 23.241294361583048, 23.2868..."
1,1,2000-01-01,-159.503564,24.127516,-0.298961,"[-159.159668021201, -159.23795868926015, -159....","[23.8158077645795, 23.84382718973737, 23.89130..."
2,2,2000-01-01,-157.098753,23.351263,-0.484728,"[-156.959155383537, -156.97300069417307, -156....","[22.9518861202522, 23.033962489599663, 23.1132..."
3,3,2000-01-01,-157.279485,25.245133,-0.357612,"[-157.737693931696, -157.672992477558, -157.59...","[25.0607392199964, 25.11306391107641, 25.15033..."
4,4,2000-01-01,-157.825133,24.521705,-0.270798,"[-158.294683608633, -158.21303988239845, -158....","[24.521180619821, 24.504563408481385, 24.49381..."
...,...,...,...,...,...,...,...
211409,602,2000-12-31,-141.635983,39.745526,-0.172387,"[-141.985759360558, -141.9041207915777, -141.8...","[39.6437173355492, 39.634473490992114, 39.6572..."
211410,603,2000-12-31,-141.498361,33.983257,-0.253132,"[-141.58688784844, -141.54724872181143, -141.5...","[33.6745512798819, 33.747682851431435, 33.8267..."
211411,604,2000-12-31,-142.074919,28.771750,-0.184957,"[-141.668381906843, -141.72614862092027, -141....","[28.5406194513576, 28.6006153253202, 28.652420..."
211412,605,2000-12-31,-141.846596,39.002991,-0.277369,"[-142.288278546339, -142.20522979965793, -142....","[38.9324068245497, 38.92851369811983, 38.94290..."


In [44]:
# create the object
TRAPS_data = TRAPSdata(vel_product_short, vel_product_long, pd_TRAPSINTERPOL_df)

In [45]:
# save the object as .pkl file
start_timer = time.perf_counter()

# TRAPSINTERPOL for interpolated TRAPS
pkl_exportname = vel_product_short + '_TRAPS_INTERPOL_' + year + '.pkl'

save_object(TRAPS_data, pkl_exportpath + pkl_exportname)

stop_timer = time.perf_counter()
print('saved ' + pkl_exportname + f' in: {stop_timer - start_timer:0.4f} seconds')

saved MULTIOBS_24HI_TRAPS_INTERPOL_2000.pkl in: 2.8502 seconds


In [46]:
# cleanup and save memory
del TRAPS_data, pd_TRAPSINTERPOL_df

# End sound

In [47]:
# measure the computation time for the entire script
stop_script_timer = time.perf_counter()
print(f'overall computation time: {stop_script_timer - start_script_timer:0.3f} seconds')

overall computation time: 114.641 seconds


In [48]:
#https://gist.github.com/tamsanh/a658c1b29b8cba7d782a8b3aed685a24

framerate = 4410
play_time_seconds = 1

t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
# G-Dur
#audio_data = np.sin(2*np.pi*391*t) + np.sin(2*np.pi*493*t) + np.sin(2*np.pi*587*t)
# D-Dur
audio_data = np.sin(2*np.pi*293*t) + np.sin(2*np.pi*369*t) + np.sin(2*np.pi*440*t)
Audio(audio_data, rate=framerate, autoplay=True)