# z-score normalization & csv files for plotting

In [28]:
# import packages
import numpy as np
import pandas as pd
import glob

In [29]:
# define functions
def zsc(s):
    '''z-score normaliztion in log space (Ardid et al., 2022)'''
    # log transform data
    log_s = np.log10(s).replace([np.inf, -np.inf], np.nan).dropna()
    
    # compute mean/std/min
    mn = np.mean(log_s)
    std = np.std(log_s)
    minzsc = np.min(log_s)                                                    

    # Calculate percentile
    s=(np.log10(s)-mn)/std
    s=s.fillna(minzsc)
    s=10**s
    return s

def zsc2(s):
    '''apply z-score normalization with rolling'''
    s=zsc(s)
    s=s.rolling(window=2).min()
    s[0]=s[1]
    return s

## Add z-score normalization

In [30]:
# year = 2009
#============================================================================================

for year in range(2000,2022+1):
    # calculate z-score normalization and save one file per station
#     all_files = sorted(glob.glob('/data/wsd03/data_manuela/MtStHelens/RSAM_DSAR/tmp_{}/*/*.csv'.format(year)))
#     sta_list = np.unique([file.split('/')[-2] for file in all_files]) # station list based on the files
    sta_list = ['CDF','SEP','VALT','HOA','LOO','USFR','REM','SWFL','SFW2']
    for sta in sta_list:
        print(sta)
        # all days (one year) of one station
        sta_files = sorted(glob.glob('/data/wsd03/data_manuela/MtStHelens/RSAM_DSAR/tmp_{}/{}/*.csv'.format(year,sta)))
        if sta_files 
        li = [] # empty list
        for filename in sta_files:
            frame = pd.read_csv(filename) # read one day
            li.append(frame) # all days (one year) of one station

        df = pd.concat(li, axis=0, ignore_index=True) # list to df
        df.set_index('time',inplace=True) # index to time
        df.index = pd.to_datetime(df.index).tz_localize(None) # time to datetime
        df.sort_index() # sort by date (index)

        # compute transform for each column (time series)
        df['zsc2_rsam'] = zsc2(df['rsam'])
        df['zsc2_mf'] = zsc2(df['mf'])
        df['zsc2_hf'] = zsc2(df['hf'])
        df['zsc2_dsar'] = zsc2(df['dsar'])
        df['zsc2_ldsar'] = zsc2(df['ldsar'])
        df['zsc2_vsar'] = zsc2(df['vsar'])
        df['zsc2_rms'] = zsc2(df['rms'])
        df['zsc2_rmes'] = zsc2(df['rmes'])
        df['zsc2_pgv'] = zsc2(df['pgv'])
        df['zsc2_pga'] = zsc2(df['pga'])

        df.to_csv('/home/koepflma/project1/Mt-St-Helens/RSAM_DSAR/data/{}_{}_extended2.csv'.format(year,sta)) # save csv file with z-score normalization
    #     df.to_csv('../{}_{}_extended2.csv'.format(year,sta)) # save csv file with z-score normalization
    print('***{} done***'.format(year))

CDF
SEP
VALT


ValueError: No objects to concatenate

In [32]:
if sta_files:
    print('not empty')
else:
    print('empty')

empty


## Create multi-year long files

In [19]:
sta = 'SEP'
#============================================================================================
# notice, you already need the extended2.csv file for the station
all_files = sorted(glob.glob('/home/koepflma/project1/Mt-St-Helens/RSAM_DSAR/data/*_{}_extended2.csv'.format(sta)))
print([file.split('/')[-1] for file in all_files]) # check how many years you have
li = [] # empty list
for filename in all_files:
    frame = pd.read_csv(filename) # read one year
    li.append(frame) # all years of one station

df = pd.concat(li, axis=0, ignore_index=True) # list to df
df.set_index('time',inplace=True) # index to time
df.index = pd.to_datetime(df.index).tz_localize(None) # time to datetime
df.sort_index() # sort by date (index)
df.to_csv('/home/koepflma/project1/Mt-St-Helens/RSAM_DSAR/data/{}_extended2_long2.csv'.format(sta)) # save csv file with multiple years of one station
print('***done***')

['2000_SEP_extended2.csv', '2001_SEP_extended2.csv', '2002_SEP_extended2.csv', '2003_SEP_extended2.csv', '2004_SEP_extended2.csv', '2005_SEP_extended2.csv', '2006_SEP_extended2.csv', '2007_SEP_extended2.csv', '2008_SEP_extended2.csv', '2009_SEP_extended2.csv', '2010_SEP_extended2.csv', '2011_SEP_extended2.csv', '2012_SEP_extended2.csv', '2013_SEP_extended2.csv', '2014_SEP_extended2.csv', '2015_SEP_extended2.csv', '2016_SEP_extended2.csv', '2017_SEP_extended2.csv', '2018_SEP_extended2.csv', '2019_SEP_extended2.csv', '2020_SEP_extended2.csv', '2021_SEP_extended2.csv', '2022_SEP_extended2.csv']
***done***


In [27]:
n1 = np.array([np.array([1,2,3,4]),np.array([1,2,3,4])])
n1

array([[1, 2, 3, 4],
       [1, 2, 3, 4]])