© 2024 Luca Kunz. Commercial use is subject to the terms of the source repository's license. All other commercial rights are reserved.

Merge snapshots of TRAPS to yearly DataFrames
==

For every year of output data, load and concatenate the snapshot TRAP coordinate dataframes exported by the TRAPs MATLAB algorithm.  
Create and export one pandas DataFrame for all TRAP occurences within one year.

This simple file only serves for completeness while the merged output files are already available and TRAPs can be directly recomputed starting from the next script.

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import time
import datetime

from IPython.display import Audio

In [None]:
# measure the computation time for the entire script
start_script_timer = time.perf_counter()

# Preliminary

## Files and paths

In [None]:
# if script is running in jupyter lab
if sys.argv[0].endswith("ipykernel_launcher.py"):
    # set the velocity product
    vel_product_ID = 1
    year_ID = 0
    notebook_run = True
    # save_fig = True
    save_fig = False

    
# if script is running as python script
else:
    # read in product from bash
    vel_product_ID = int(sys.argv[1])
    # read in year from bash
    year_ID = int(sys.argv[2])
    notebook_run = False
    save_fig = True


vel_product_short = ['ENSRYS_24HM', 'MULTIOBS_24HI', 'MULTIOBS_24HM', 'SEALEVEL_24HI'][vel_product_ID]

vel_product_long = ['CMEMS GLOBAL_REANALYSIS_PHY_001_031 ENSEMBLE MEAN (1/4°, 24HM)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HI)', 
                    'CMEMS MULTIOBS_GLO_PHY_REP_015_004 (1/4°, 24HM)', 
                    'SEALEVEL_GLO_PHY_L4_NRT_OBSERVATIONS_008_046'][vel_product_ID]

years = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', 
         '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

year = years[year_ID]


In [None]:
# define paths to matlab output csv files
xTC_csvpath = 'export_matlab/xTC/' + vel_product_short + '/' + year + '/'
yTC_csvpath = 'export_matlab/yTC/' + vel_product_short + '/' + year + '/'
pxt_csvpath = 'export_matlab/pxt/' + vel_product_short + '/' + year + '/'
pyt_csvpath = 'export_matlab/pyt/' + vel_product_short + '/' + year + '/'
s1TC_csvpath = 'export_matlab/s1TC/' + vel_product_short + '/' + year + '/'

# collect all csv files from the given directories, fn for filename
xTC_filenames = [fn for fn in os.listdir(xTC_csvpath) if fn.endswith('.csv')]
yTC_filenames = [fn for fn in os.listdir(yTC_csvpath) if fn.endswith('.csv')]
pxt_filenames = [fn for fn in os.listdir(pxt_csvpath) if fn.endswith('.csv')]
pyt_filenames = [fn for fn in os.listdir(pyt_csvpath) if fn.endswith('.csv')]
s1TC_filenames = [fn for fn in os.listdir(s1TC_csvpath) if fn.endswith('.csv')]

# sort the unordered lists
xTC_filenames.sort()
yTC_filenames.sort()
pxt_filenames.sort()
pyt_filenames.sort()
s1TC_filenames.sort()

In [None]:
# print check
#xTC_filenames
#yTC_filenames
#pxt_filenames
#pyt_filenames
#s1TC_filenames

In [None]:
# retrieve the timestamps from the filenames, call them timestrings since these are no datetime objects
xTC_timestrings = [(fn.split('_')[3]).split('.')[0] for fn in xTC_filenames]
yTC_timestrings = [(fn.split('_')[3]).split('.')[0] for fn in yTC_filenames]
pxt_timestrings = [(fn.split('_')[3]).split('.')[0] for fn in pxt_filenames]
pyt_timestrings = [(fn.split('_')[3]).split('.')[0] for fn in pyt_filenames]
s1TC_timestrings = [(fn.split('_')[3]).split('.')[0] for fn in s1TC_filenames]

# assert that for all components data is given for the same timestamps
assert xTC_timestrings==yTC_timestrings==pxt_timestrings==pyt_timestrings==s1TC_timestrings, 'mismatching timestamps'

# define one array for all timestamps
timestrings = xTC_timestrings

# assert that timestrings are unique and in order
assert np.all(timestrings==np.unique(timestrings)), 'unordered or duplicate timestamps'

# get the prefix of the csv files excluding the timestamp
xTC_prefix = xTC_filenames[0][:-16]
yTC_prefix = yTC_filenames[0][:-16]
pxt_prefix = pxt_filenames[0][:-16]
pyt_prefix = pyt_filenames[0][:-16]
s1TC_prefix = s1TC_filenames[0][:-16]

In [None]:
# save memory
del xTC_timestrings, yTC_timestrings, pxt_timestrings, pyt_timestrings, s1TC_timestrings
del xTC_filenames, yTC_filenames, pxt_filenames, pyt_filenames, s1TC_filenames

# Import & concatenate DataFrames

Read the data from the respective csv files and put it into a pandas DataFrame.

In [None]:
start_timer = time.perf_counter()

# initialise the main DataFrames upon which to concat in the following loop
pd_xTC_df = pd.DataFrame()
pd_yTC_df = pd.DataFrame()
pd_pxt_df = pd.DataFrame()
pd_pyt_df = pd.DataFrame()
pd_s1TC_df = pd.DataFrame()

for timestring in timestrings:

    # cdf for current DataFrame
    # xTC: x-component trap cores, yTC: y-component trap cores - vector
    pd_xTC_cdf = pd.read_csv(xTC_csvpath + xTC_prefix + timestring + '.csv', header=None)
    pd_yTC_cdf = pd.read_csv(yTC_csvpath + yTC_prefix + timestring + '.csv', header=None)

    # pxt: x-coordinates of TRAPs, pyt: y-coordinates of TRAPs - size: [#points along a TRAP, #TRAPs]
    # Transpose directly to attain the TRAP number along the index and point numbers as columns.
    # coordinates with Nan indicate regions of the tensor lines that do not satisfy the desired attraction properties
    pd_pxt_cdf = pd.read_csv(pxt_csvpath + pxt_prefix + timestring + '.csv', header=None).T
    pd_pyt_cdf = pd.read_csv(pyt_csvpath + pyt_prefix + timestring + '.csv', header=None).T

    # s1TC: attraction rate at trap cores - vector
    pd_s1TC_cdf = pd.read_csv(s1TC_csvpath + s1TC_prefix + timestring + '.csv', header=None)
    
    # assert that coordinate arrays are of same shape
    assert pd_xTC_cdf.shape==pd_yTC_cdf.shape, 'TRAP cores: different number of x- and y-coordinates'
    assert pd_pxt_cdf.shape==pd_pyt_cdf.shape, 'TRAP curves: different number of x- and y-coordinates'

    # assert that number of TRAP cores equals number of TRAP curves/tensor lines and number of attraction rates
    assert pd_xTC_cdf.shape[0]==pd_pxt_cdf.shape[0], 'mismatch number of TRAP cores and curves'
    assert pd_xTC_cdf.shape==pd_s1TC_cdf.shape, 'mismatch number of TRAP cores and number of attraction rates'
    
    # insert time column to make later retracing of individual TRAPs possible
    pd_xTC_cdf.insert(0, 'time', timestring)
    pd_yTC_cdf.insert(0, 'time', timestring)
    pd_pxt_cdf.insert(0, 'time', timestring)
    pd_pyt_cdf.insert(0, 'time', timestring)
    pd_s1TC_cdf.insert(0, 'time', timestring)
    
    # concatenate current dataframes to the main one
    pd_xTC_df = pd.concat([pd_xTC_df, pd_xTC_cdf])
    pd_yTC_df = pd.concat([pd_yTC_df, pd_yTC_cdf])
    pd_pxt_df = pd.concat([pd_pxt_df, pd_pxt_cdf])
    pd_pyt_df = pd.concat([pd_pyt_df, pd_pyt_cdf])
    pd_s1TC_df = pd.concat([pd_s1TC_df, pd_s1TC_cdf])
    
    
# index is the number of a TRAP at the given timestep
# together with the time column this allows for a unique identification of a TRAP and for later retracing
pd_xTC_df.index.name = 'TRAP_ID'
pd_yTC_df.index.name = 'TRAP_ID'
pd_pxt_df.index.name = 'TRAP_ID'
pd_pyt_df.index.name = 'TRAP_ID'
pd_s1TC_df.index.name = 'TRAP_ID'

stop_timer = time.perf_counter()
print(f'task time: {stop_timer - start_timer:0.4f} seconds')

# Export to .csv

In [None]:
pd_xTC_df.to_csv('export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_xTC_' + year + '_df.csv', header=True, index=True)
pd_yTC_df.to_csv('export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_yTC_' + year + '_df.csv', header=True, index=True)
pd_pxt_df.to_csv('export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_pxt_' + year + '_df.csv', header=True, index=True)
pd_pyt_df.to_csv('export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_pyt_' + year + '_df.csv', header=True, index=True)
pd_s1TC_df.to_csv('export_csv_TRAPS/' + vel_product_short + '/' + year + '/pd_s1TC_' + year + '_df.csv', header=True, index=True)

# End sound

In [None]:
# measure the computation time for the entire script
stop_script_timer = time.perf_counter()
print(f'overall computation time: {stop_script_timer - start_script_timer:0.3f} seconds (' + year + ')')

In [None]:
#https://gist.github.com/tamsanh/a658c1b29b8cba7d782a8b3aed685a24

framerate = 4410
play_time_seconds = 1

t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
# G-Dur
#audio_data = np.sin(2*np.pi*391*t) + np.sin(2*np.pi*493*t) + np.sin(2*np.pi*587*t)
# D-Dur
audio_data = np.sin(2*np.pi*293*t) + np.sin(2*np.pi*369*t) + np.sin(2*np.pi*440*t)
Audio(audio_data, rate=framerate, autoplay=True)