© 2024 Luca Kunz. Commercial use is subject to the terms of the source repository's license. All other commercial rights are reserved.

# Building the GDP dataframes

Build the overall Global Drifter Program (GDP) dataframes for 6-hourly drifter positions in the period 2000-2019, objectify and save it.  
Also implement a data column to indicate a drifter's drogue state. Save the overall as well as the drogued and undrogued drifters datasets.  
Use the naming conventions from the TRAPS datasets.

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import time
import datetime
import pickle

from IPython.display import display, Audio
# import jupyter notebook files like regular python modules
import import_ipynb
from aa_define_classes import TRAPSdata

importing Jupyter notebook from aa_define_classes.ipynb


In [2]:
# measure the computation time for the entire script
start_script_timer = time.perf_counter()

# Preliminary

## Pickle object saving function

In [3]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # overwrites any existing file.
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

## Files and paths

In [4]:
# define the path to the source files
gdp6hi_path = 'source_misc/GDP/GDP_6HI_VELOCITIES.csv'
meta6hi_path = 'source_misc/GDP/GDP_6HI_METADATA.csv'

# Import DataFrames

Read the data from the respective csv files and put it into a pandas DataFrame.

In [5]:
start_timer = time.perf_counter()

In [6]:
pd_GDP_6HI_df = pd.read_csv(gdp6hi_path)
pd_META_6HI_df = pd.read_csv(meta6hi_path)

In [7]:
stop_timer = time.perf_counter()
print(f'task time: {stop_timer - start_timer:0.4f} seconds')

task time: 0.7900 seconds


In [8]:
# print check
pd_GDP_6HI_df
#pd_META_6HI_df

Unnamed: 0,id,date,time,lat,lon,t,ve,vn,speed,varlat,varlon,vart
0,9936,2000-08-25,18:00:00,22.506,-155.303,999.999,-23.632,25.999,35.134,0.000009,0.000010,1000.000000
1,9936,2000-08-26,00:00:00,22.555,-155.354,999.999,-23.282,25.778,34.735,0.000169,0.000224,1000.000000
2,9936,2000-08-26,06:00:00,22.607,-155.401,999.999,-20.157,25.800,32.741,0.000068,0.000088,1000.000000
3,9936,2000-08-26,12:00:00,22.655,-155.439,999.999,-17.255,24.874,30.273,0.000804,0.001324,1000.000000
4,9936,2000-08-26,18:00:00,22.704,-155.473,999.999,-15.801,24.654,29.283,0.001676,0.003018,1000.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
887754,300234066815360,2019-12-19,00:00:00,22.719,-148.885,24.623,8.513,-35.683,36.684,0.000003,0.000005,0.001684
887755,300234066815360,2019-12-19,06:00:00,22.639,-148.873,24.607,5.368,-31.234,31.692,0.000003,0.000005,0.001684
887756,300234066815360,2019-12-19,12:00:00,22.597,-148.863,24.607,11.555,-19.254,22.455,0.000003,0.000005,0.001684
887757,300234066815360,2019-12-19,18:00:00,22.564,-148.824,24.793,21.620,-23.482,31.919,0.000003,0.000005,0.001684


# Remove columns

Remove columns from the drifter dataframes which won't be needed in the following.

In [9]:
del pd_GDP_6HI_df['t'], pd_GDP_6HI_df['vart'], pd_GDP_6HI_df['varlat'], pd_GDP_6HI_df['varlon'], pd_GDP_6HI_df['speed']

In [10]:
# print check
#pd_GDP_6HI_df
pd_META_6HI_df

Unnamed: 0,id,wmo,expno,typebuoy,ddate,dtime,dlat,dlon,edate,etime,elat,elon,ldate,ltime,typedeath
0,9936,51513,1325,SVP,1999-09-06,12:33:00,12.01,-144.83,2000-09-19,04:04:00,25.45,-156.99,2000-09-19,04:04:00,3
1,15864,46988,1348,SVP,2000-04-12,14:33:00,44.65,-124.65,2001-03-07,12:01:00,41.23,-130.62,2001-03-07,12:01:00,2
2,15889,0,1348,SVP,1999-09-23,16:11:00,44.65,-124.70,2000-03-26,01:56:00,46.30,-124.08,2000-03-26,01:56:00,4
3,15896,64640,1348,SVP,1999-09-23,14:12:00,44.66,-125.14,2000-10-08,00:42:00,33.47,-127.99,2000-10-08,00:42:00,3
4,15900,46986,1348,SVP,2000-04-12,12:37:00,44.64,-124.42,2001-01-22,03:38:00,35.52,-123.87,1979-01-01,00:00:00,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,300234066814330,3201757,21312,SVP,2019-08-15,16:59:00,27.95,-152.00,2022-04-15,00:00:00,31.36,-162.50,2019-11-29,12:00:00,0
701,300234066814340,3201758,21312,SVP,2019-08-15,03:00:00,25.62,-154.00,2022-04-15,00:00:00,42.04,-148.57,2020-07-01,00:00:00,0
702,300234066815200,3201761,21312,SVP,2019-08-19,13:00:00,42.38,-135.99,2021-09-17,01:00:00,21.67,-157.94,2020-05-04,12:00:00,1
703,300234066815220,3201763,21312,SVP,2019-08-20,03:00:00,44.94,-131.99,2022-04-15,00:00:00,36.74,-153.93,2020-03-03,12:00:00,0


# Rename columns

Change column names to more readable ones and a naming conventions that allows easy integration into the TRAP scripts.

In [11]:
pd_GDP_6HI_df.rename(columns={'id': 'drifter_ID', 
                              'lat': 'drifter_lat', 
                              'lon': 'drifter_lon', 
                              've': 'drifter_U', 
                              'vn': 'drifter_V'}, inplace=True)

pd_META_6HI_df.rename(columns={'id': 'drifter_ID', 
                               'ddate': 'deploy_date', 
                               'dtime': 'deploy_time', 
                               'edate': 'end_date', 
                               'etime': 'end_time', 
                               'ldate': 'drogue_lost_date', 
                               'ltime': 'drogue_lost_time', 
                               'dlat': 'deploy_lat', 
                               'dlon': 'deploy_lon', 
                               'elat': 'end_lat', 
                               'elon': 'end_lon'}, inplace=True)

# Convert velocities to SI units

In the 6HI dataset, velocities are originally given in cm/s. Convert this to m/s.

In [12]:
pd_GDP_6HI_df['drifter_U'] = (pd_GDP_6HI_df.drifter_U/100).copy()
pd_GDP_6HI_df['drifter_V'] = (pd_GDP_6HI_df.drifter_V/100).copy()

In [13]:
# print check
pd_GDP_6HI_df
#pd_META_6HI_df

Unnamed: 0,drifter_ID,date,time,drifter_lat,drifter_lon,drifter_U,drifter_V
0,9936,2000-08-25,18:00:00,22.506,-155.303,-0.23632,0.25999
1,9936,2000-08-26,00:00:00,22.555,-155.354,-0.23282,0.25778
2,9936,2000-08-26,06:00:00,22.607,-155.401,-0.20157,0.25800
3,9936,2000-08-26,12:00:00,22.655,-155.439,-0.17255,0.24874
4,9936,2000-08-26,18:00:00,22.704,-155.473,-0.15801,0.24654
...,...,...,...,...,...,...,...
887754,300234066815360,2019-12-19,00:00:00,22.719,-148.885,0.08513,-0.35683
887755,300234066815360,2019-12-19,06:00:00,22.639,-148.873,0.05368,-0.31234
887756,300234066815360,2019-12-19,12:00:00,22.597,-148.863,0.11555,-0.19254
887757,300234066815360,2019-12-19,18:00:00,22.564,-148.824,0.21620,-0.23482


# Timestrings to timestamps

Convert the timestrings to **naive** datetime objects since the TRAPS dataframes contain naive timestamps only and timestamps will be compared later.

In [14]:
pd_GDP_6HI_df['time'] = pd.to_datetime(pd_GDP_6HI_df.date + 'T' + pd_GDP_6HI_df.time)

pd_META_6HI_df['deploy_time'] = pd.to_datetime(pd_META_6HI_df.deploy_date + 'T' + pd_META_6HI_df.deploy_time)
pd_META_6HI_df['end_time'] = pd.to_datetime(pd_META_6HI_df.end_date + 'T' + pd_META_6HI_df.end_time)
pd_META_6HI_df['drogue_lost_time'] = pd.to_datetime(pd_META_6HI_df.drogue_lost_date + 'T' + pd_META_6HI_df.drogue_lost_time)

del pd_GDP_6HI_df['date']
del pd_META_6HI_df['deploy_date'], pd_META_6HI_df['end_date'], pd_META_6HI_df['drogue_lost_date']

In [15]:
# assert uniqueness of drifter timestamps
assert not np.any(pd_GDP_6HI_df.duplicated(subset=['drifter_ID', 'time'])), 'found drifter id with duplicate timestamp'

In [16]:
# assert meta dataframe consistency
# assert np.all(pd_META_6HI_df.deploy_time <= pd_META_6HI_df.end_time), 'found drifter ID with deprecated start/end time'
# assert np.all(pd_META_6HI_df.end_time >= pd_META_6HI_df.drogue_lost_time), 'found drifter ID with deprecated drogueloss time'

In [17]:
# print check
#pd_GDP_6HI_df.time
pd_META_6HI_df

Unnamed: 0,drifter_ID,wmo,expno,typebuoy,deploy_time,deploy_lat,deploy_lon,end_time,end_lat,end_lon,drogue_lost_time,typedeath
0,9936,51513,1325,SVP,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
1,15864,46988,1348,SVP,2000-04-12 14:33:00,44.65,-124.65,2001-03-07 12:01:00,41.23,-130.62,2001-03-07 12:01:00,2
2,15889,0,1348,SVP,1999-09-23 16:11:00,44.65,-124.70,2000-03-26 01:56:00,46.30,-124.08,2000-03-26 01:56:00,4
3,15896,64640,1348,SVP,1999-09-23 14:12:00,44.66,-125.14,2000-10-08 00:42:00,33.47,-127.99,2000-10-08 00:42:00,3
4,15900,46986,1348,SVP,2000-04-12 12:37:00,44.64,-124.42,2001-01-22 03:38:00,35.52,-123.87,1979-01-01 00:00:00,3
...,...,...,...,...,...,...,...,...,...,...,...,...
700,300234066814330,3201757,21312,SVP,2019-08-15 16:59:00,27.95,-152.00,2022-04-15 00:00:00,31.36,-162.50,2019-11-29 12:00:00,0
701,300234066814340,3201758,21312,SVP,2019-08-15 03:00:00,25.62,-154.00,2022-04-15 00:00:00,42.04,-148.57,2020-07-01 00:00:00,0
702,300234066815200,3201761,21312,SVP,2019-08-19 13:00:00,42.38,-135.99,2021-09-17 01:00:00,21.67,-157.94,2020-05-04 12:00:00,1
703,300234066815220,3201763,21312,SVP,2019-08-20 03:00:00,44.94,-131.99,2022-04-15 00:00:00,36.74,-153.93,2020-03-03 12:00:00,0


# Map metadata to drifters dataframe

For the 6HI dataset, metadata is given in an extra dataframe. We want to map this metadata to each row of the drifters dataset to align its layout with the 1HI dataset.
Thus, assign to all origin IDs in the drifters dataframe the corresponding data from the meta dataframe by using pd.map() and a mapping dictionary created from the meta dataframe. This approach is a million times faster than assigning through arrays!

In [18]:
# build the mapping dataframe
pd_MAPPING_df = pd_META_6HI_df[['drifter_ID', 
                                'deploy_time', 'deploy_lat', 'deploy_lon', 
                                'end_time', 'end_lat', 'end_lon', 
                                'drogue_lost_time', 'typedeath']].set_index('drifter_ID').T.copy()

# convert the mapping dataframe into a dictionary which maps origin IDs to attributes
MAPPING_DICT = pd_MAPPING_df.to_dict('records')

In [19]:
# print check
pd_MAPPING_df
#MAPPING_DICT[0]

drifter_ID,9936,15864,15889,15896,15900,15902,15903,15905,15906,15907,...,300234066612970,300234066614860,300234066811330,300234066812190,300234066812240,300234066814330,300234066814340,300234066815200,300234066815220,300234066815360
deploy_time,1999-09-06 12:33:00,2000-04-12 14:33:00,1999-09-23 16:11:00,1999-09-23 14:12:00,2000-04-12 12:37:00,2000-07-09 12:24:00,2000-07-08 10:55:00,2000-07-08 17:03:00,2000-07-08 17:05:00,2000-09-09 12:02:00,...,2018-07-01 21:00:00,2018-06-30 22:00:00,2019-08-07 10:00:00,2019-08-14 19:00:00,2019-08-04 21:00:00,2019-08-15 16:59:00,2019-08-15 03:00:00,2019-08-19 13:00:00,2019-08-20 03:00:00,2019-08-15 10:00:00
deploy_lat,12.01,44.65,44.65,44.66,44.64,44.48,44.65,44.63,44.65,44.62,...,32.66,34.25,21.01,24.4,19.56,27.95,25.62,42.38,44.94,26.81
deploy_lon,-144.83,-124.65,-124.7,-125.14,-124.42,-124.52,-124.49,-125.12,-125.37,-125.37,...,143.99,141.07,-145.0,-154.99,-131.03,-152.0,-154.0,-135.99,-131.99,-153.0
end_time,2000-09-19 04:04:00,2001-03-07 12:01:00,2000-03-26 01:56:00,2000-10-08 00:42:00,2001-01-22 03:38:00,2002-02-23 16:32:00,2002-08-12 15:46:00,2001-04-19 02:29:00,2002-05-02 11:27:00,2002-10-18 16:20:00,...,2021-08-29 10:59:00,2022-04-15 00:00:00,2021-06-26 18:00:00,2019-11-20 15:00:00,2022-04-15 00:00:00,2022-04-15 00:00:00,2022-04-15 00:00:00,2021-09-17 01:00:00,2022-04-15 00:00:00,2022-01-01 12:00:00
end_lat,25.45,41.23,46.3,33.47,35.52,30.66,26.39,39.94,30.78,29.74,...,21.63,37.14,36.49,21.75,35.28,31.36,42.04,21.67,36.74,37.32
end_lon,-156.99,-130.62,-124.08,-127.99,-123.87,-137.99,-130.13,-132.61,-133.53,-132.21,...,-157.91,-147.33,-148.07,-153.95,-147.69,-162.5,-148.57,-157.94,-153.93,-169.03
drogue_lost_time,2000-09-19 04:04:00,2001-03-07 12:01:00,2000-03-26 01:56:00,2000-10-08 00:42:00,1979-01-01 00:00:00,2002-02-23 16:32:00,2002-08-12 15:46:00,2001-04-19 02:29:00,2002-05-02 11:27:00,2002-10-18 16:20:00,...,2019-01-20 16:00:00,2019-01-09 12:00:00,2020-06-28 12:00:00,2019-11-20 15:00:00,2020-01-15 12:00:00,2019-11-29 12:00:00,2020-07-01 00:00:00,2020-05-04 12:00:00,2020-03-03 12:00:00,2020-02-06 12:00:00
typedeath,3,2,4,3,3,3,3,2,3,3,...,1,0,3,3,0,0,0,1,0,3


In [20]:
# map() substitutes each value in the series with another value that is derived from the dictionary and returns a series
pd_GDP_6HI_df['deploy_time'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[0]) # gives one dictionary per row
pd_GDP_6HI_df['deploy_lat'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[1])
pd_GDP_6HI_df['deploy_lon'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[2])
pd_GDP_6HI_df['end_time'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[3])
pd_GDP_6HI_df['end_lat'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[4])
pd_GDP_6HI_df['end_lon'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[5])
pd_GDP_6HI_df['drogue_lost_time'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[6])
pd_GDP_6HI_df['typedeath'] = pd_GDP_6HI_df.drifter_ID.map(MAPPING_DICT[7])

In [21]:
# print check
pd_GDP_6HI_df

Unnamed: 0,drifter_ID,time,drifter_lat,drifter_lon,drifter_U,drifter_V,deploy_time,deploy_lat,deploy_lon,end_time,end_lat,end_lon,drogue_lost_time,typedeath
0,9936,2000-08-25 18:00:00,22.506,-155.303,-0.23632,0.25999,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
1,9936,2000-08-26 00:00:00,22.555,-155.354,-0.23282,0.25778,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
2,9936,2000-08-26 06:00:00,22.607,-155.401,-0.20157,0.25800,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
3,9936,2000-08-26 12:00:00,22.655,-155.439,-0.17255,0.24874,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
4,9936,2000-08-26 18:00:00,22.704,-155.473,-0.15801,0.24654,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
887754,300234066815360,2019-12-19 00:00:00,22.719,-148.885,0.08513,-0.35683,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3
887755,300234066815360,2019-12-19 06:00:00,22.639,-148.873,0.05368,-0.31234,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3
887756,300234066815360,2019-12-19 12:00:00,22.597,-148.863,0.11555,-0.19254,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3
887757,300234066815360,2019-12-19 18:00:00,22.564,-148.824,0.21620,-0.23482,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3


# Determine drifter domain types

Similar to TRAPS, flag all drifter positions that lie inside the histogram domain with **H**, all that lie outside the histogram domain within the so called margin domain with **M** (MD=VD\HD). Domain types can then be easily selected for the different kinds of histograms.

The velocity domain actually considers all grid points of a given velocity product and thus slightly varies from product to product around the boundaries -160°E, -125°E, 22.5°N, 42.5°N which were chosen for the netCDF velocity data download. Call them the approximated velocity domain boundaries (AVD).  
In the following, the HD boundaries will relate to these AVD boundaries. 

In [22]:
# the bounds set during the netCDF velocity field download
wbound_AVD = -160
ebound_AVD = -125
sbound_AVD = 22.5
nbound_AVD = 42.5

# the boundaries of the histogram domain
wbound_HD = wbound_AVD + 1
ebound_HD = ebound_AVD - 1
sbound_HD = sbound_AVD + 1
nbound_HD = nbound_AVD - 1

In [23]:
# get the drifter coordinates
drifter_lons = pd_GDP_6HI_df.drifter_lon.to_numpy()
drifter_lats = pd_GDP_6HI_df.drifter_lat.to_numpy()

# initialise the domain array
drifter_domains = np.zeros(drifter_lons.size).astype(str)

# ============ FLAGS FOR HISTOGRAM DOMAIN ============
# use the bitwise operator &
# The histogram bin intervals will be left-open, right-closed and the first interval should not be left-inclusive 
# since this would skew the histogram results. As a consequence, we obtain different comparisons at the boundaries.
hd_drifters_filter = ((drifter_lons>wbound_HD) & (drifter_lons<=ebound_HD) & 
                      (drifter_lats>sbound_HD) & (drifter_lats<=nbound_HD))

# assign the respective domain tags, a drifter can only be either in the histogram or in the margin domain
drifter_domains[hd_drifters_filter] = 'H'
drifter_domains[~hd_drifters_filter] = 'M'

# assign flags to dataframe
pd_GDP_6HI_df['drifter_domain'] = drifter_domains

# save memory
del drifter_domains, drifter_lons, drifter_lats, hd_drifters_filter

In [24]:
# print check
pd_GDP_6HI_df

Unnamed: 0,drifter_ID,time,drifter_lat,drifter_lon,drifter_U,drifter_V,deploy_time,deploy_lat,deploy_lon,end_time,end_lat,end_lon,drogue_lost_time,typedeath,drifter_domain
0,9936,2000-08-25 18:00:00,22.506,-155.303,-0.23632,0.25999,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M
1,9936,2000-08-26 00:00:00,22.555,-155.354,-0.23282,0.25778,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M
2,9936,2000-08-26 06:00:00,22.607,-155.401,-0.20157,0.25800,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M
3,9936,2000-08-26 12:00:00,22.655,-155.439,-0.17255,0.24874,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M
4,9936,2000-08-26 18:00:00,22.704,-155.473,-0.15801,0.24654,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
887754,300234066815360,2019-12-19 00:00:00,22.719,-148.885,0.08513,-0.35683,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M
887755,300234066815360,2019-12-19 06:00:00,22.639,-148.873,0.05368,-0.31234,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M
887756,300234066815360,2019-12-19 12:00:00,22.597,-148.863,0.11555,-0.19254,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M
887757,300234066815360,2019-12-19 18:00:00,22.564,-148.824,0.21620,-0.23482,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M


# Determine drogue state

Buoys can lose their drogue over time and by this change their behaviour. This drogue-off moment is -if given- registered in the metadata.  
Determine if a drifter at a current timestamp is past or before its drogueoff time and assign the boolen value to a new column to label a drifters drogue state.  
The drifter is still in drogued state if the drogueloss time hasn't been passed.  
For some drifters, the drogueloss time is not available in the meta dataset or just doesn't exist and is thus set to 0000/00/00T00:00 or rather 1979-01-01T00:00:00.  
These drifters also remain in drogued state.

In [25]:
before_drogueloss_filter = (pd_GDP_6HI_df.time < pd_GDP_6HI_df.drogue_lost_time)
undefined_drogueloss_filter = (pd_GDP_6HI_df.drogue_lost_time==pd.to_datetime('1979-01-01T00:00:00'))

pd_GDP_6HI_df['drogued'] = (before_drogueloss_filter | undefined_drogueloss_filter)

In [26]:
# create two seperate dataframes
pd_DROGUED_6HI_df = pd_GDP_6HI_df[pd_GDP_6HI_df.drogued].copy()
pd_UNDROGUED_6HI_df = pd_GDP_6HI_df[~pd_GDP_6HI_df.drogued].copy()

# reset the index after cropping
pd_DROGUED_6HI_df.reset_index(drop=True, inplace=True)
pd_UNDROGUED_6HI_df.reset_index(drop=True, inplace=True)

# assert that dataframes only contain what they display
assert np.all(pd_DROGUED_6HI_df.drogued), 'drogued dataframe contains undrogued drifters'
assert not np.any(pd_UNDROGUED_6HI_df.drogued), 'undrogued dataframe contains drogued drifters'

In [27]:
# print check
#before_drogueloss_filter
#undefined_drogueloss_filter
pd_GDP_6HI_df
# pd_DROGUED_6HI_df
# pd_UNDROGUED_6HI_df

Unnamed: 0,drifter_ID,time,drifter_lat,drifter_lon,drifter_U,drifter_V,deploy_time,deploy_lat,deploy_lon,end_time,end_lat,end_lon,drogue_lost_time,typedeath,drifter_domain,drogued
0,9936,2000-08-25 18:00:00,22.506,-155.303,-0.23632,0.25999,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M,True
1,9936,2000-08-26 00:00:00,22.555,-155.354,-0.23282,0.25778,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M,True
2,9936,2000-08-26 06:00:00,22.607,-155.401,-0.20157,0.25800,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M,True
3,9936,2000-08-26 12:00:00,22.655,-155.439,-0.17255,0.24874,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M,True
4,9936,2000-08-26 18:00:00,22.704,-155.473,-0.15801,0.24654,1999-09-06 12:33:00,12.01,-144.83,2000-09-19 04:04:00,25.45,-156.99,2000-09-19 04:04:00,3,M,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
887754,300234066815360,2019-12-19 00:00:00,22.719,-148.885,0.08513,-0.35683,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M,True
887755,300234066815360,2019-12-19 06:00:00,22.639,-148.873,0.05368,-0.31234,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M,True
887756,300234066815360,2019-12-19 12:00:00,22.597,-148.863,0.11555,-0.19254,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M,True
887757,300234066815360,2019-12-19 18:00:00,22.564,-148.824,0.21620,-0.23482,2019-08-15 10:00:00,26.81,-153.00,2022-01-01 12:00:00,37.32,-169.03,2020-02-06 12:00:00,3,M,True


# Export pickle objects

In [28]:
# create the object, using the former TRAPS object
GDP_6HI_data = TRAPSdata(None, None, pd_GDP_6HI_df)
DROGUED_6HI_data = TRAPSdata(None, None, pd_DROGUED_6HI_df)
UNDROGUED_6HI_data = TRAPSdata(None, None, pd_UNDROGUED_6HI_df)

In [29]:
# save the object as .pkl file
start_timer = time.perf_counter()

pkl_GDP_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_ALL_0019.pkl'
pkl_DROGUED_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_DROGUED_0019.pkl'
pkl_UNDROGUED_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_UNDROGUED_0019.pkl'

# save the object as .pkl file                
save_object(GDP_6HI_data, pkl_GDP_6HI_exportname)
save_object(DROGUED_6HI_data, pkl_DROGUED_6HI_exportname)
save_object(UNDROGUED_6HI_data, pkl_UNDROGUED_6HI_exportname)

# # also save a .csv version of the datasets to the same pkl folder
# csv_GDP_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_ALL_0019.csv'
# csv_DROGUED_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_DROGUED_0019.csv'
# csv_UNDROGUED_6HI_exportname = 'export_pkl/GDP_6HI/GDP_6HI_UNDROGUED_0019.csv'

# pd_GDP_6HI_df.to_csv(csv_GDP_6HI_exportname, header=True, index=True)
# pd_DROGUED_6HI_df.to_csv(csv_DROGUED_6HI_exportname, header=True, index=True)
# pd_UNDROGUED_6HI_df.to_csv(csv_UNDROGUED_6HI_exportname, header=True, index=True)

stop_timer = time.perf_counter()

print(f'finished export in: {stop_timer - start_timer:0.1f} seconds')

finished export in: 0.6 seconds


# End sound

In [30]:
# measure the computation time for the entire script
stop_script_timer = time.perf_counter()
print(f'overall computation time: {stop_script_timer - start_script_timer:0.3f} seconds')

overall computation time: 2.579 seconds


In [31]:
#https://gist.github.com/tamsanh/a658c1b29b8cba7d782a8b3aed685a24

framerate = 4410
play_time_seconds = 1

t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
# G-Dur
#audio_data = np.sin(2*np.pi*391*t) + np.sin(2*np.pi*493*t) + np.sin(2*np.pi*587*t)
# D-Dur
audio_data = np.sin(2*np.pi*293*t) + np.sin(2*np.pi*369*t) + np.sin(2*np.pi*440*t)
Audio(audio_data, rate=framerate, autoplay=True)