# z-score normalization & csv files for plotting

In [2]:
# import packages
import numpy as np
import pandas as pd
import glob

In [3]:
# define functions
def zsc(s):
    '''z-score normaliztion in log space (Ardid et al., 2022)'''
    # log transform data
    log_s = np.log10(s).replace([np.inf, -np.inf], np.nan).dropna()
    
    # compute mean/std/min
    mn = np.mean(log_s)
    std = np.std(log_s)
    minzsc = np.min(log_s)                                                    

    # Calculate percentile
    s=(np.log10(s)-mn)/std
    s=s.fillna(minzsc)
    s=10**s
    return s

def zsc2(s):
    '''apply z-score normalization with rolling'''
    s=zsc(s)
    s=s.rolling(window=2).min()
    s[0]=s[1]
    return s

## Add z-score normalization

In [6]:
year = 2004
#============================================================================================
# calculate z-score normalization and save one file per station
all_files = sorted(glob.glob('/data/wsd03/data_manuela/MtStHelens/RSAM_DSAR/tmp_{}/*/*.csv'.format(year)))
sta_list = np.unique([file.split('/')[-2] for file in all_files]) # station list based on the files
for sta in sta_list:
    print(sta)
    # all days (one year) of one station
    sta_files = sorted(glob.glob('/data/wsd03/data_manuela/MtStHelens/RSAM_DSAR/tmp_{}/{}/*.csv'.format(year,sta)))
    li = [] # empty list
    for filename in sta_files:
        frame = pd.read_csv(filename) # read one day
        li.append(frame) # all days (one year) of one station

    df = pd.concat(li, axis=0, ignore_index=True) # list to df
    df.set_index('time',inplace=True) # index to time
    df.index = pd.to_datetime(df.index).tz_localize(None) # time to datetime
    df.sort_index() # sort by date (index)

    # compute transform for each column (time series)
    df['zsc2_rsam'] = zsc2(df['rsam'])
    df['zsc2_mf'] = zsc2(df['mf'])
    df['zsc2_hf'] = zsc2(df['hf'])
    df['zsc2_dsar'] = zsc2(df['dsar'])
    df['zsc2_ldsar'] = zsc2(df['ldsar'])
    df['zsc2_vsar'] = zsc2(df['vsar'])
    df['zsc2_rms'] = zsc2(df['rms'])
    df['zsc2_rmes'] = zsc2(df['rmes'])
    df['zsc2_pgv'] = zsc2(df['pgv'])
    df['zsc2_pga'] = zsc2(df['pga'])

#     df.to_csv('../{}_{}_extended2.csv'.format(year,sta)) # save csv file with z-score normalization
print('Done')

BLIS
CDF
EDM
ELK
FL2
HSR
JRO
JUN
SEP
SHW
SOS
STD
TDL
YEL


## Create multi-year long files

In [5]:
sta = 'SEP'
#============================================================================================
# notice, you already need the extended2.csv file for the station
all_files = sorted(glob.glob('../data/*_{}_extended2.csv'.format(sta)))
print(all_files) # check how many years you have
li = [] # empty list
for filename in all_files:
    frame = pd.read_csv(filename) # read one year
    li.append(frame) # all years of one station

df = pd.concat(li, axis=0, ignore_index=True) # list to df
df.set_index('time',inplace=True) # index to time
df.index = pd.to_datetime(df.index).tz_localize(None) # time to datetime
df.sort_index() # sort by date (index)
# df.to_csv('../{}_extended2_long.csv'.format(sta)) # save csv file with multiple years of one station

['../data/2001_SEP_extended2.csv', '../data/2002_SEP_extended2.csv', '../data/2003_SEP_extended2.csv', '../data/2004_SEP_extended2.csv', '../data/2005_SEP_extended2.csv']


Unnamed: 0_level_0,rsam,mf,hf,dsar,ldsar,vsar,rms,rmes,pgv,pga,zsc2_rsam,zsc2_mf,zsc2_hf,zsc2_dsar,zsc2_ldsar,zsc2_vsar,zsc2_rms,zsc2_rmes,zsc2_pgv,zsc2_pga
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2001-01-16 00:00:00,7.643616,10.148158,22.800724,0.951912,1.497074,0.445080,0.000002,9.028962e-07,0.000017,0.003239,0.049730,0.037636,0.041057,0.348076,1.690316,0.180075,0.317108,0.066492,0.895926,1.566736
2001-01-16 00:10:00,11.746288,15.047115,34.797393,0.924881,1.472006,0.432421,0.000002,1.573785e-06,0.000015,0.003009,0.049730,0.037636,0.041057,0.348076,1.690316,0.180075,0.317108,0.066492,0.895926,1.566736
2001-01-16 00:20:00,11.894070,15.834418,34.756411,0.951302,1.441206,0.455583,0.000002,1.554787e-06,0.000010,0.002017,0.186347,0.136652,0.211670,0.348076,1.403288,0.180075,0.736729,1.024592,0.322046,0.449747
2001-01-16 00:30:00,11.919201,15.761260,34.893236,0.958039,1.461086,0.451700,0.000002,1.553760e-06,0.000010,0.001810,0.193649,0.159053,0.211670,0.440817,1.403288,0.257086,0.736729,1.021190,0.322046,0.321061
2001-01-16 00:40:00,11.704302,15.752302,35.792670,0.932227,1.429495,0.440099,0.000002,1.615558e-06,0.000010,0.001859,0.184306,0.158757,0.214930,0.371954,1.306057,0.207895,0.755762,1.021190,0.326749,0.321061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2005-12-31 23:00:00,7899.104107,3289.391730,2192.766643,2.713981,5.567687,1.500110,0.000682,4.170915e-04,0.002448,0.077652,4.286045,1.925664,1.162529,7.326932,55.923640,6.227264,15.258844,20.831208,6.716878,2.288967
2005-12-31 23:10:00,8031.415830,3115.736426,1869.483608,2.869433,5.890154,1.666629,0.000759,5.161932e-04,0.002610,0.034503,4.286045,1.787688,1.013998,7.326932,64.251868,6.227264,15.258844,20.831208,6.716878,0.882727
2005-12-31 23:20:00,7732.606309,2882.688483,2040.556146,2.394955,6.293501,1.412697,0.000728,4.768560e-04,0.002395,0.046167,4.163459,1.606977,1.013998,3.243699,88.116419,4.367806,16.808116,25.329965,6.480586,0.882727
2005-12-31 23:30:00,8238.184220,3312.856048,2120.743760,2.645054,5.904942,1.562120,0.000619,4.095723e-04,0.002265,0.062203,4.163459,1.606977,1.162062,3.243699,89.364664,4.367806,13.204951,20.285120,5.922979,1.577085
