### Tyendinaga Data
##### Notes:
- data exported from 'Student' computer
- using data with filename Tyendinaga1_001.asc as main data source to make plots with
- Tyendinaga1_001.asc data was recorded on Oct. 26, 2025 from 18:07:12 to 18:27:12

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
import scipy.stats as ss
from sklearn.preprocessing import MinMaxScaler
import scipy.fft
import pywt
from scipy.signal.windows import hann
from scipy import interpolate
from scipy.ndimage import uniform_filter
from scipy.signal import ShortTimeFFT
from scipy.signal import savgol_filter
from scipy.signal import hilbert

##### DataFrame --> File Name Legend:
- df --> Tyendinaga_001.asc
- df2 --> Tyendinaga2_001.asc
- df3 --> Tyendinaga3_001.asc
- df4 --> Tyendinaga4_001.asc
- df5 --> Tyendinaga5_001.asc

In [None]:
# Filename: Tyendinaga1
column_names = ['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ']
df = pd.read_table('Tyendinaga1_001.asc', delimiter=r'\s+', encoding='latin-1', skiprows=32, names=column_names)
#df.head().style

In [None]:
# Filename: Tyendinaga2
# Loading the rest of the Tyendinaga files so can make comparitive plots between datasets
column_names2 = ['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ']
df2 = pd.read_table('Tyendinaga2_001.asc', delimiter=r'\s+', encoding='latin-1', skiprows=32, names=column_names2)
#df2.head().style

In [None]:
# Filename: Tyendinaga3
column_names3 = ['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ']
df3 = pd.read_table('Tyendinaga3_001.asc', delimiter=r'\s+', encoding='latin-1', skiprows=32, names=column_names3)
#df3.head().style

In [None]:
# Filename: Tyendinaga4
column_names4 = ['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ']
df4 = pd.read_table('Tyendinaga4_001.asc', delimiter=r'\s+', encoding='latin-1', skiprows=32, names=column_names4)
#df4.head().style

In [None]:
# Filename: Tyendinaga5
column_names5 = ['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ']
df5 = pd.read_table('Tyendinaga5_001.asc', delimiter=r'\s+', encoding='latin-1', skiprows=32, names=column_names5)
#df5.head().style

In [None]:
any_null = df.isnull().any().any()
print(any_null) # Will show false if no null values
#df.info()
print(df2.isnull().any().any())
print(df3.isnull().any().any())
print(df4.isnull().any().any())
print(df5.isnull().any().any())

##### Columns of Interest:
- NS: North-South component of motion
- EW: East-West component of motion
- Z: Vertical (Z-axis) component of motion

In [None]:
#print(df.columns)
# Want to make a time column:
sampling_rate = 1024  # Hz (same for all data files)
dataframes = [df, df2, df3, df4, df5]
for i, dataframe in enumerate(dataframes):
    time = dataframe.index/sampling_rate
    dataframe['time (s)'] = time
    print(dataframe.head())

***
### Plotting the Time Series
##### Notes:
- Plot the NS, EW, and Z data separately

In [None]:
# reminder: dataframes = [df, df2, df3, df4, df5]
for i, dframe in enumerate(dataframes):
    if i==0:
        print('dataframe: df')
    else:
        print(f'dataframe: df{i+1}')

    # Plot:
    fig, axes = plt.subplots(3, 1, figsize=(10, 10))
    time_minutes = dframe['time (s)']/60
    # North-South data:
    axes[0].plot(time_minutes, dframe['NS'], label='North-South', color='forestgreen')
    axes[0].set_ylabel('North-South Component')
    axes[0].set_title('North-South Data')
    axes[0].set_xlabel('Time (min)')
    axes[0].grid(True)
    
    # East-West data:
    axes[1].plot(time_minutes, dframe['EW'], label='East-West', color='rebeccapurple')
    axes[1].set_ylabel('East-West Component')
    axes[1].set_title('East-West Data')
    axes[1].set_xlabel('Time (min)')
    axes[1].grid(True)
    
    # Z data:
    axes[2].plot(time_minutes, dframe['Z'], label='Z/Vertical Component', color='royalblue')
    axes[2].set_ylabel('Z Component')
    axes[2].set_title('Z-Component Data')
    axes[2].set_xlabel('Time (min)')
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.show()

***
### Normalizing, Detrending and Shifting Data
##### Notes:
- data range of time series was normalized using MinMaxScaler from sklearn
- data was linearly detrended using signal.detrend from scipy
- the mean of the data was shifted to approximately zero

In [None]:
# Linearly detrend data, normalize and shift to zero mean:
# reminder: dataframes = [df, df2, df3, df4, df5]
for i, dframe in enumerate(dataframes):
    if i==0:
        print('dataframe: df')
    else:
        print(f'dataframe: df{i+1}')
        
    # Detrend
    dframe['NS_Detrended'] = signal.detrend(dframe['NS'], type='linear') # North-South data
    dframe['EW_Detrended'] = signal.detrend(dframe['EW'], type='linear') # East-West data
    dframe['Z_Detrended'] = signal.detrend(dframe['Z'], type='linear') # Z-component data

    # Normalize
    scaler = MinMaxScaler()
    dframe['NS_Normalized'] = scaler.fit_transform(dframe['NS_Detrended'].values.reshape(-1, 1)).flatten() # North-South
    dframe['EW_Normalized'] = scaler.fit_transform(dframe['EW_Detrended'].values.reshape(-1, 1)).flatten() # East-West
    dframe['Z_Normalized'] = scaler.fit_transform(dframe['Z_Detrended'].values.reshape(-1, 1)).flatten() # Z-component
    print(f"NS Min-Max normalized range: [{dframe['NS_Normalized'].min():.3f}, {dframe['NS_Normalized'].max():.3f}]")
    print(f"EW Min-Max normalized range: [{dframe['EW_Normalized'].min():.3f}, {dframe['EW_Normalized'].max():.3f}]")
    print(f"Z Min-Max normalized range: [{dframe['Z_Normalized'].min():.3f}, {dframe['Z_Normalized'].max():.3f}]")

    # Shift Mean:
    dframe['NS_Normalized_Shifted'] = (dframe['NS_Normalized'] - 0.5)
    dframe['EW_Normalized_Shifted'] = (dframe['EW_Normalized'] - 0.5)
    dframe['Z_Normalized_Shifted'] = (dframe['Z_Normalized'] - 0.5)
    print(f"New range for NS: [{dframe['NS_Normalized_Shifted'].min():.3f}, {dframe['NS_Normalized_Shifted'].max():.3f}]")
    print(f"New mean for NS: {dframe['NS_Normalized_Shifted'].mean():.6f}")
    print(f"New range for EW: [{dframe['EW_Normalized_Shifted'].min():.3f}, {dframe['EW_Normalized_Shifted'].max():.3f}]")
    print(f"New mean for EW: {dframe['EW_Normalized_Shifted'].mean():.6f}")
    print(f"New range for Z: [{dframe['Z_Normalized_Shifted'].min():.3f}, {dframe['Z_Normalized_Shifted'].max():.3f}]")
    print(f"New mean for Z: {dframe['Z_Normalized_Shifted'].mean():.6f}")

    # Plot:
    fig, axes = plt.subplots(3, 1, figsize=(10, 10))
    time_minutes = dframe['time (s)']/60
    # North-South data:
    axes[0].plot(time_minutes, dframe['NS_Normalized_Shifted'], label='North-South', color='forestgreen')
    axes[0].set_ylabel('North-South Component')
    axes[0].set_title('DNS North-South Data')
    axes[0].set_xlabel('Time (min)')
    axes[0].grid(True)

    # East-West data:
    axes[1].plot(time_minutes, dframe['EW_Normalized_Shifted'], label='East-West', color='rebeccapurple')
    axes[1].set_ylabel('East-West Component')
    axes[1].set_title('DNS East-West Data')
    axes[1].set_xlabel('Time (min)')
    axes[1].grid(True)
    
    # Z data:
    axes[2].plot(time_minutes, dframe['Z_Normalized_Shifted'], label='Z/Vertical Component', color='royalblue')
    axes[2].set_ylabel('Z Component')
    axes[2].set_title('DNS Z-Component Data')
    axes[2].set_xlabel('Time (min)')
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.show()
    print("DNS = Detrended, Normalized, Shifted")
    print("")
    # Stats for plots
    print("North-South Stats:")
    print(ss.describe(dframe['NS_Normalized_Shifted']))
    print("median:", dframe["NS_Normalized_Shifted"].median())
    print("mode:", ss.mode(df["NS_Normalized_Shifted"])) 
    print("standard deviation:", dframe["NS_Normalized_Shifted"].std())
    print("")
    print("East-West Stats:")
    print(ss.describe(dframe['EW_Normalized_Shifted']))
    print("median:", dframe["EW_Normalized_Shifted"].median())
    print("mode:", ss.mode(dframe["EW_Normalized_Shifted"])) 
    print("standard deviation:", dframe["EW_Normalized_Shifted"].std())
    print("")
    print("Z-component Stats:")
    print(ss.describe(dframe['Z_Normalized_Shifted']))
    print("median:", dframe["Z_Normalized_Shifted"].median())
    print("mode:", ss.mode(dframe["Z_Normalized_Shifted"])) 
    print("standard deviation:", dframe["Z_Normalized_Shifted"].std())
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

***
### Subsampling the Time Series
##### Notes:
- the original sampling rate for the data is 1024 Hz
- different step sizes were applied to the normalized, detrended and mean-shifted (DNS) data for subsampling
- comment out certain series before plotting to focus on a specific step value
- adjust x-axis limits to zoom in on certain times

#### "percent_check" Function for Choosing Subsampling Step Size:
##### Notes:
- this method uses interpolation to compare subsampled data to the full DNS data
- the absolute error between the full DNS data and the reconstructed subsampled data is calculated and divided by the full data range
- this result is then used determine if the error is within the selected threshold (i.e. 5%)

In [None]:
# First use percent_check function to choose step-size (i.e. decimation_factor) to use for subsample function
def percent_check(data, column, step, tolerance=0.05):
    """Determine if error for subsampled data is within 5% tolerance compared to full DNS data range
    data: dataframe to use
    column: column of interest for y values
    step: data step to use for subsampling
    tolerance: error tolerance expressed as a decimal"""
    # Get original series
    x_original = np.arange(len(data))
    y_original = data[column].values
    
    # Get subsampled series
    x_subsampled = np.arange(0, len(data), step)
    y_subsampled = data[column].iloc[::step].values
    
    # Interpolate subsampled data back to original timestamps
    f_interp = interpolate.interp1d(x_subsampled, y_subsampled, kind='linear', fill_value='extrapolate')
    y_reconstructed = f_interp(x_original)
    
    # Calculate absolute errors
    abs_errors = np.abs(y_original - y_reconstructed)
    
    # Calculate data range (the scale of the data)
    data_range = np.max(y_original) - np.min(y_original)
    #print("data_range:", data_range)
    # Normalize errors by data range instead of individual values
    normalized_errors = abs_errors / data_range
    
    # Test if within tolerance
    within_tolerance = normalized_errors <= tolerance
    pct_within = (np.sum(within_tolerance) / len(within_tolerance)) * 100
    
    # Overall pass/fail
    passes = pct_within >= 95  # At least 95% of points within tolerance
    
    results = {'step': step,
        'tolerance': f'{tolerance*100}%',
        'passes': passes,
        'points_within_tolerance': f'{pct_within: .4f}%',
        'num_original_points': len(y_original),
        'num_subsampled_points': len(y_subsampled),
        'data_range': f'{data_range: .4f}',  
        'max_normalized_error': f'{(np.max(normalized_errors)*100): .4f}%' }
    
    return results
# step_1 = 12
# Results for df:
NS_results_step_1 = percent_check(df, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS results 1 (df):", NS_results_step_1)
EW_results_step_1 = percent_check(df, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW results 1 (df):", EW_results_step_1)
Z_results_step_1 = percent_check(df, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z results 1 (df):", Z_results_step_1)
print("")
# Results for df2:
NS_results_step_1_df2 = percent_check(df2, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS results 1 (df2):", NS_results_step_1_df2)
EW_results_step_1_df2 = percent_check(df2, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW results 1 (df2):", EW_results_step_1_df2)
Z_results_step_1_df2 = percent_check(df2, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z results 1 (df2):", Z_results_step_1_df2)
print("")
# Results for df3:
NS_results_step_1_df3 = percent_check(df3, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS results 1 (df3):", NS_results_step_1_df3)
EW_results_step_1_df3 = percent_check(df3, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW results 1 (df3):", EW_results_step_1_df3)
Z_results_step_1_df3 = percent_check(df3, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z results 1 (df3):", Z_results_step_1_df3)
print("")
# Results for df4:
NS_results_step_1_df4 = percent_check(df4, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS results 1 (df4):", NS_results_step_1_df4)
EW_results_step_1_df4 = percent_check(df4, 'EW_Normalized_Shifted', 7, tolerance=0.05) # passes with stepsizeof 10 but not 12
print("EW results 1 (df4):", EW_results_step_1_df4)
Z_results_step_1_df4 = percent_check(df4, 'Z_Normalized_Shifted', 7, tolerance=0.05) # Z passes check with stepsize of 12 but not 10
print("Z results 1 (df4):", Z_results_step_1_df4)
print("")
# Results for df5:
NS_results_step_1_df5 = percent_check(df5, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS results 1 (df5):", NS_results_step_1_df5)
EW_results_step_1_df5 = percent_check(df5, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW results 1 (df5):", EW_results_step_1_df5)
Z_results_step_1_df5 = percent_check(df5, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z results 1 (df5):", Z_results_step_1_df5)
print("")
# Note: 7 is the largest stepsize that passes the tolerance check everywhere (i.e. for all dataframes), simultaneously

#### percent_check results:
- The Z data consistently had the lowest amount of points within the error tolerance compared to NS and EW data
- A step size of 7 was the largest that passed the percent_check test for NS, EW, and Z data simultaneously for all dataframes (plotted below)

In [None]:
# plotting time series using percent_check step sizes to compare to full DNS plots
for i, dframe in enumerate(dataframes):
    if i==0:
        print('dataframe: df')
    else:
        print(f'dataframe: df{i+1}')
    # Plot:
    fig, axes = plt.subplots(3, 1, figsize=(10, 10))
    time_minutes = dframe['time (s)']/60
    # North-South data:
    axes[0].plot(time_minutes, dframe['NS_Normalized_Shifted'], label='North-South (Full)', color='red')
    axes[0].plot(time_minutes[::7], dframe['NS_Normalized_Shifted'][::7], label='North-South (Step=7)', color='forestgreen')
    axes[0].set_ylabel('North-South Component')
    axes[0].set_title('Subsampled and Full DNS North-South Data')
    axes[0].set_xlabel('Time (min)')
    axes[0].legend()
    axes[0].grid(True)
    
    # East-West data:
    axes[1].plot(time_minutes, dframe['EW_Normalized_Shifted'], label='East-West (Full)', color='red')
    axes[1].plot(time_minutes[::7], dframe['EW_Normalized_Shifted'][::7], label='East-West (Step=7)', color='rebeccapurple')
    axes[1].set_ylabel('East-West Component')
    axes[1].set_title('Subsampled and Full DNS East-West Data')
    axes[1].set_xlabel('Time (min)')
    axes[1].legend()
    axes[1].grid(True)
    
    # Z data:
    axes[2].plot(time_minutes, dframe['Z_Normalized_Shifted'], label='Z (Full)', color='red')
    axes[2].plot(time_minutes[::7], dframe['Z_Normalized_Shifted'][::7], label='Z (Step=7)', color='royalblue')
    axes[2].set_ylabel('Z Component')
    axes[2].set_title('Subsampled and Full DNS Z-Component Data')
    axes[2].set_xlabel('Time (min)')
    axes[2].legend()
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.show()

In [None]:
# try using scipy.signal.decimate and compare to indexing results
original_sampling_rate = 1024 # Hz
def subsample(data, decimation_factor, sr):
    """Subsample data using scipy.signal.decimate with anti-aliasing filter.
    data: input data (array)
    decimation_factor: factor by which to reduce sampling rate
    sr: Original sampling rate"""
    # scipy.signal.decimate applies an anti-aliasing filter automatically
    # Use a higher order filter for better anti-aliasing (default is 8)
    decimated_data = scipy.signal.decimate(data, decimation_factor, ftype='iir', zero_phase=True)  
    # get new sampling rate:
    new_sr = sr / decimation_factor
    #print(f"New sampling rate: {new_sr} Hz")
    return decimated_data, new_sr

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Compute results for NS, EW, and Z data:
# (Using stepsize of 7 for decimation_factor)
# df:
NS_sub_results_df, NS_new_sr_df = subsample(df['NS_Normalized_Shifted'].values, 7, original_sampling_rate)
EW_sub_results_df, EW_new_sr_df = subsample(df['EW_Normalized_Shifted'].values, 7, original_sampling_rate)
Z_sub_results_df, Z_new_sr_df = subsample(df['Z_Normalized_Shifted'].values, 7, original_sampling_rate)
#df2:
NS_sub_results_df2, NS_new_sr_df2 = subsample(df2['NS_Normalized_Shifted'].values, 7, original_sampling_rate)
EW_sub_results_df2, EW_new_sr_df2 = subsample(df2['EW_Normalized_Shifted'].values, 7, original_sampling_rate)
Z_sub_results_df2, Z_new_sr_df2 = subsample(df2['Z_Normalized_Shifted'].values, 7, original_sampling_rate)
#df3:
NS_sub_results_df3, NS_new_sr_df3 = subsample(df3['NS_Normalized_Shifted'].values, 7, original_sampling_rate)
EW_sub_results_df3, EW_new_sr_df3 = subsample(df3['EW_Normalized_Shifted'].values, 7, original_sampling_rate)
Z_sub_results_df3, Z_new_sr_df3 = subsample(df3['Z_Normalized_Shifted'].values, 7, original_sampling_rate)
#df4:
NS_sub_results_df4, NS_new_sr_df4 = subsample(df4['NS_Normalized_Shifted'].values, 7, original_sampling_rate)
EW_sub_results_df4, EW_new_sr_df4 = subsample(df4['EW_Normalized_Shifted'].values, 7, original_sampling_rate)
Z_sub_results_df4, Z_new_sr_df4 = subsample(df4['Z_Normalized_Shifted'].values, 7, original_sampling_rate)
#df5:
NS_sub_results_df5, NS_new_sr_df5 = subsample(df5['NS_Normalized_Shifted'].values, 7, original_sampling_rate)
EW_sub_results_df5, EW_new_sr_df5 = subsample(df5['EW_Normalized_Shifted'].values, 7, original_sampling_rate)
Z_sub_results_df5, Z_new_sr_df5 = subsample(df5['Z_Normalized_Shifted'].values, 7, original_sampling_rate)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NS_subsampled_data = [NS_sub_results_df, NS_sub_results_df2, NS_sub_results_df3, NS_sub_results_df4, NS_sub_results_df5]
EW_subsampled_data = [EW_sub_results_df, EW_sub_results_df2, EW_sub_results_df3, EW_sub_results_df4, EW_sub_results_df5]
Z_subsampled_data = [Z_sub_results_df, Z_sub_results_df2, Z_sub_results_df3, Z_sub_results_df4, Z_sub_results_df5]

for i, dframe in enumerate(dataframes):
    # Plot:
    time_minutes = dframe['time (s)']/60
    # North-South data:
    plt.figure(figsize=(12,4))
    plt.plot(time_minutes, dframe['NS_Normalized_Shifted'], label='North-South (Full)', color='red')
    plt.plot(time_minutes[::7], NS_subsampled_data[i], label='North-South (Step=7)', color='forestgreen')
    plt.ylabel('North-South Component')
    plt.title('Subsampled and Full DNS North-South Data')
    plt.xlabel('Time (min)')
    plt.legend(loc='upper left')
    plt.grid(True)
    
    # East-West data:
    plt.figure(figsize=(12,4))
    plt.plot(time_minutes, dframe['EW_Normalized_Shifted'], label='East-West (Full)', color='red')
    plt.plot(time_minutes[::7], EW_subsampled_data[i], label='East-West (Step=7)', color='rebeccapurple')
    plt.ylabel('East-West Component')
    plt.title('Subsampled and Full DNS East-West Data')
    plt.xlabel('Time (min)')
    plt.legend(loc='upper left')
    plt.grid(True)
    
    # Z data:
    plt.figure(figsize=(12,4))
    plt.plot(time_minutes, dframe['Z_Normalized_Shifted'], label='Z (Full)', color='red')
    plt.plot(time_minutes[::7], Z_subsampled_data[i], label='Z (Step=7)', color='royalblue')
    plt.ylabel('Z Component')
    plt.title('Subsampled and Full DNS Z-Component Data')
    plt.xlabel('Time (min)')
    plt.legend(loc='upper left')
    plt.grid(True)

plt.tight_layout()
plt.show()
# Note: New sampling rate is 146.285714... Hz using decimation_factor of 7

#### percent_check function using scipy.signal.decimate for y_subsampled instead of indexing:

In [None]:
# Try to use percent_check for scipy decimate version of subsampling
def percent_check_scipy_decimate(data, column, step, tolerance=0.05):
    x_original = np.arange(len(data))
    y_original = data[column].values
    
    # Get subsampled series
    x_subsampled = np.arange(0, len(data), step)
    y_subsampled=scipy.signal.decimate(data[column].values, step, ftype='iir', zero_phase=True)
    
    # Interpolate subsampled data back to original timestamps
    f_interp = interpolate.interp1d(x_subsampled, y_subsampled, kind='linear', fill_value='extrapolate')
    y_reconstructed = f_interp(x_original)
    
    # Calculate absolute errors
    abs_errors = np.abs(y_original - y_reconstructed)
    
    # Calculate data range (the scale of the data)
    data_range = np.max(y_original) - np.min(y_original)
    #print("data_range:", data_range)
    # Normalize errors by data range instead of individual values
    normalized_errors = abs_errors / data_range
    
    # Test if within tolerance
    within_tolerance = normalized_errors <= tolerance
    pct_within = (np.sum(within_tolerance) / len(within_tolerance)) * 100
    
    # Overall pass/fail
    passes = pct_within >= 95  # At least 95% of points within tolerance
    
    results = {'step': step,
        'tolerance': f'{tolerance*100}%',
        'passes': passes,
        'points_within_tolerance': f'{pct_within: .4f}%',
        'num_original_points': len(y_original),
        'num_subsampled_points': len(y_subsampled),
        'data_range': f'{data_range: .4f}',  
        'max_normalized_error': f'{(np.max(normalized_errors)*100): .4f}%' }
    return results

# results for df:
NS_results_test_1_df = percent_check_scipy_decimate(df, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS_results_test_1 (df):", NS_results_test_1_df)
EW_results_test_1_df = percent_check_scipy_decimate(df, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW_results_test_1 (df):", EW_results_test_1_df)
Z_results_test_1_df = percent_check_scipy_decimate(df, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z_results_test_1 (df):", Z_results_test_1_df)
print("")
# results for df2:
NS_results_test_1_df2 = percent_check_scipy_decimate(df2, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS_results_test_1 (df2):", NS_results_test_1_df2)
EW_results_test_1_df2 = percent_check_scipy_decimate(df2, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW_results_test_1 (df2):", EW_results_test_1_df2)
Z_results_test_1_df2 = percent_check_scipy_decimate(df2, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z_results_test_1 (df2):", Z_results_test_1_df2)
print("")
# results for df3:
NS_results_test_1_df3 = percent_check_scipy_decimate(df3, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS_results_test_1 (df3):", NS_results_test_1_df3)
EW_results_test_1_df3 = percent_check_scipy_decimate(df3, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW_results_test_1 (df3):", EW_results_test_1_df3)
Z_results_test_1_df3 = percent_check_scipy_decimate(df3, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z_results_test_1 (df3):", Z_results_test_1_df3)
print("")
# results for df4:
NS_results_test_1_df4 = percent_check_scipy_decimate(df4, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS_results_test_1 (df4):", NS_results_test_1_df4)
EW_results_test_1_df4 = percent_check_scipy_decimate(df4, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW_results_test_1 (df4):", EW_results_test_1_df4)
Z_results_test_1_df4 = percent_check_scipy_decimate(df4, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z_results_test_1 (df4):", Z_results_test_1_df4)
print("")
# results for df5:
NS_results_test_1_df5 = percent_check_scipy_decimate(df5, 'NS_Normalized_Shifted', 7, tolerance=0.05)
print("NS_results_test_1 (df5):", NS_results_test_1_df5)
EW_results_test_1_df5 = percent_check_scipy_decimate(df5, 'EW_Normalized_Shifted', 7, tolerance=0.05)
print("EW_results_test_1 (df5):", EW_results_test_1_df5)
Z_results_test_1_df5 = percent_check_scipy_decimate(df5, 'Z_Normalized_Shifted', 7, tolerance=0.05)
print("Z_results_test_1 (df5):", Z_results_test_1_df5)
print("")

#### Comparing percent_check results from scipy.signal.decimate and indexing for y_subsampled
- Using ftype='fir' instead of 'iir' for the scipy method yields more points within the error tolerance and overall lower max_normalized_error values (tested with df)
    - The 'fir' filter type also applies a Hamming window
- There were a similar percentage of points within the error tolerance when using the scipy.signal.decimate method compared to the indexing method for subsampling
- For later cells, the scipy.signal.decimate method will be used

***
### Tapering the Time Series
##### Notes:
- a Hann window/raised cosine was used to taper the DNS time series

In [None]:
# reminder (defined in previous cell): dataframes = [df, df2, df3, df4, df5]
def apply_hann_window(data, NS_column, EW_column, Z_column):
    # Want to apply Hann window and return windowed data with coherent gain factor
    window = np.hanning(len(data))
    NS_windowed_data = data[NS_column] * window
    EW_windowed_data = data[EW_column] * window
    Z_windowed_data = data[Z_column] * window
    # Coherent gain for Hann window is 0.5, so need to multiply by 2 later to make correction
    coherent_gain = 0.5
    #print("Coherent gain:", coherent_gain)
    return NS_windowed_data, EW_windowed_data, Z_windowed_data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# df:
NS_hann_result_df, EW_hann_result_df, Z_hann_result_df = apply_hann_window(df, 'NS_Normalized_Shifted', 'EW_Normalized_Shifted', 'Z_Normalized_Shifted')
# df2:
NS_hann_result_df2, EW_hann_result_df2, Z_hann_result_df2 = apply_hann_window(df2, 'NS_Normalized_Shifted', 'EW_Normalized_Shifted', 'Z_Normalized_Shifted')
# df3:
NS_hann_result_df3, EW_hann_result_df3, Z_hann_result_df3 = apply_hann_window(df3, 'NS_Normalized_Shifted', 'EW_Normalized_Shifted', 'Z_Normalized_Shifted')
# df4:
NS_hann_result_df4, EW_hann_result_df4, Z_hann_result_df4 = apply_hann_window(df4, 'NS_Normalized_Shifted', 'EW_Normalized_Shifted', 'Z_Normalized_Shifted')
# df5:
NS_hann_result_df5, EW_hann_result_df5, Z_hann_result_df5 = apply_hann_window(df5, 'NS_Normalized_Shifted', 'EW_Normalized_Shifted', 'Z_Normalized_Shifted')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NS_hann_results = [NS_hann_result_df, NS_hann_result_df2, NS_hann_result_df3, NS_hann_result_df4, NS_hann_result_df5]
EW_hann_results = [EW_hann_result_df, EW_hann_result_df2, EW_hann_result_df3, EW_hann_result_df4, EW_hann_result_df5]
Z_hann_results = [Z_hann_result_df, Z_hann_result_df2, Z_hann_result_df3, Z_hann_result_df4, Z_hann_result_df5]

# Plot tapered data with full data for all dataframes:
for i, dframe in enumerate(dataframes):
    if i == 0:
        print("dataframe: df")
    else:
        print(f"dataframe: df{i+1}")
    # Plot:
    fig, axes = plt.subplots(3, 1, figsize=(10, 10))
    time_minutes = dframe['time (s)']/60
    # North-South data:
    axes[0].plot(time_minutes, dframe['NS_Normalized_Shifted'], label='North-South (Full)', color='red')
    axes[0].plot(time_minutes, NS_hann_results[i], label='North-South (Tapered)', color='forestgreen')
    axes[0].set_ylabel('North-South Component')
    axes[0].set_title('Tapered and Full DNS North-South Data')
    axes[0].set_xlabel('Time (min)')
    axes[0].legend()
    axes[0].grid(True)
    
    # East-West data:
    axes[1].plot(time_minutes, dframe['EW_Normalized_Shifted'], label='East-West (Full)', color='red')
    axes[1].plot(time_minutes, EW_hann_results[i], label='East-West (Tapered)', color='rebeccapurple')
    axes[1].set_ylabel('East-West Component')
    axes[1].set_title('Tapered and Full DNS East-West Data')
    axes[1].set_xlabel('Time (min)')
    axes[1].legend()
    axes[1].grid(True)
    
    # Z data:
    axes[2].plot(time_minutes, dframe['Z_Normalized_Shifted'], label='Z (Full)', color='red')
    axes[2].plot(time_minutes, Z_hann_results[i], label='Z (Tapered)', color='royalblue')
    axes[2].set_ylabel('Z Component')
    axes[2].set_title('Tapered and Full DNS Z-Component Data')
    axes[2].set_xlabel('Time (min)')
    axes[2].legend()
    #axes[2].set_xlim(-0.001,0.01)
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.show()

#### 'percent_check_tapered' for tapered time series:
- a tapering version of the 'percent_check' function that was used for subsampling

In [None]:
def percent_check_tapered(original_data, tapered_data_full, padding, tolerance=0.05):
    """original_data : Original unpadded time series (array)
    tapered_data_full : Tapered time series, includes padding (array)
    padding : Amount of padding added to each end (int)
    tolerance : Acceptable error as fraction of data range (float)"""
    # Extract original from tapered data (remove padding)
    if padding > 0:
        tapered_original_region = tapered_data_full[padding:-padding]
    else:
        tapered_original_region = tapered_data_full
    
    # Direct comparison: tapered vs original
    abs_errors = np.abs(original_data - tapered_original_region)
    # normalization
    data_range = np.max(original_data) - np.min(original_data)
    normalized_errors = abs_errors / data_range
    
    # Calculate metrics
    within_tolerance = normalized_errors <= tolerance
    pct_within = (np.sum(within_tolerance) / len(within_tolerance))*100
    
    # Mean Absolute Error (MAE) metrics
    mae = np.mean(abs_errors)
    std_original = np.std(original_data)
    mae_pct_std = (mae / std_original)*100
    
    passes = pct_within >= 95
    
    results = {
        'padding': padding,
        'tolerance': f'{tolerance*100}%',
        'passes': passes,
        'points_within_tolerance': f'{pct_within: .4f}%',
        'max_normalized_error': f'{(np.max(normalized_errors)*100): .4f}%',
        'mean_abs_error': f'{mae: .4f}',
        'mae_as_%_of_std': f'{mae_pct_std: .4f}%',
        'num_points': len(original_data),
        'data_range': f'{data_range: .4f}'}
    
    return results
# df:
NS_taper_results_1_df = percent_check_tapered(df['NS_Normalized_Shifted'], NS_hann_result_df, padding=0, tolerance=0.05)
print("NS_taper_results_1 (df):", NS_taper_results_1_df)
EW_taper_results_1_df = percent_check_tapered(df['EW_Normalized_Shifted'], EW_hann_result_df, padding=0, tolerance=0.05)
print("EW_taper_results_1 (df):", EW_taper_results_1_df)
Z_taper_results_1_df = percent_check_tapered(df['Z_Normalized_Shifted'], Z_hann_result_df, padding=0, tolerance=0.05)
print("Z_taper_results_1 (df):", Z_taper_results_1_df)
print("")
# df2:
NS_taper_results_1_df2 = percent_check_tapered(df2['NS_Normalized_Shifted'], NS_hann_result_df2, padding=0, tolerance=0.05)
print("NS_taper_results_1 (df2):", NS_taper_results_1_df2)
EW_taper_results_1_df2 = percent_check_tapered(df2['EW_Normalized_Shifted'], EW_hann_result_df2, padding=0, tolerance=0.05)
print("EW_taper_results_1 (df2):", EW_taper_results_1_df2)
Z_taper_results_1_df2 = percent_check_tapered(df2['Z_Normalized_Shifted'], Z_hann_result_df2, padding=0, tolerance=0.05)
print("Z_taper_results_1 (df2):", Z_taper_results_1_df2)
print("")
# df3:
NS_taper_results_1_df3 = percent_check_tapered(df3['NS_Normalized_Shifted'], NS_hann_result_df3, padding=0, tolerance=0.05)
print("NS_taper_results_1 (df3):", NS_taper_results_1_df3)
EW_taper_results_1_df3 = percent_check_tapered(df3['EW_Normalized_Shifted'], EW_hann_result_df3, padding=0, tolerance=0.05)
print("EW_taper_results_1 (df3):", EW_taper_results_1_df3)
Z_taper_results_1_df3 = percent_check_tapered(df3['Z_Normalized_Shifted'], Z_hann_result_df3, padding=0, tolerance=0.05)
print("Z_taper_results_1 (df3):", Z_taper_results_1_df3)
print("")
# df4:
NS_taper_results_1_df4 = percent_check_tapered(df4['NS_Normalized_Shifted'], NS_hann_result_df4, padding=0, tolerance=0.05)
print("NS_taper_results_1 (df4):", NS_taper_results_1_df4)
EW_taper_results_1_df4 = percent_check_tapered(df4['EW_Normalized_Shifted'], EW_hann_result_df4, padding=0, tolerance=0.05)
print("EW_taper_results_1 (df4):", EW_taper_results_1_df4)
Z_taper_results_1_df4 = percent_check_tapered(df4['Z_Normalized_Shifted'], Z_hann_result_df4, padding=0, tolerance=0.05)
print("Z_taper_results_1 (df4):", Z_taper_results_1_df4)
print("")
# df5:
NS_taper_results_1_df5 = percent_check_tapered(df5['NS_Normalized_Shifted'], NS_hann_result_df5, padding=0, tolerance=0.05)
print("NS_taper_results_1 (df5):", NS_taper_results_1_df5)
EW_taper_results_1_df5 = percent_check_tapered(df5['EW_Normalized_Shifted'], EW_hann_result_df5, padding=0, tolerance=0.05)
print("EW_taper_results_1 (df5):", EW_taper_results_1_df5)
Z_taper_results_1_df5 = percent_check_tapered(df5['Z_Normalized_Shifted'], Z_hann_result_df5, padding=0, tolerance=0.05)
print("Z_taper_results_1 (df5):", Z_taper_results_1_df5)

#### 'percent_check_tapered' results:
- The North-South and East-West data passed the percent_check_tapered test with roughly 99% of points within the 5% error tolerance (for df)
    - However, the Up-Down/Z component data has significantly less points within the error tolerance, with approx. 78% of points within the 5% error tolerance (df)
    - df3 also had significantly less points within the error tolerence for the Z direction, with only around 58% of points within the 5% tolerance

***
### Combining Subsampling and Tapering
##### Notes:
- The full series was tapered before subsampling
- A Hann window was used for tapering
- A step size of 7 was used for subsampling to be consistent across the North-South, East-West, and Up-Down (Z) data
    - step=7 was the largest step size thst passed the percent_check test for all three data components in all dataframes simultaneously

In [None]:
# Put tapered results into subsample function from a previous cell:
def subsample_windowed(NS_windowed, EW_windowed, Z_windowed, sr=1024, decimation_factor=7):
    NS_decimated_data = scipy.signal.decimate(NS_windowed, decimation_factor, ftype='iir', zero_phase=True)
    EW_decimated_data = scipy.signal.decimate(EW_windowed, decimation_factor, ftype='iir', zero_phase=True)
    Z_decimated_data = scipy.signal.decimate(Z_windowed, decimation_factor, ftype='iir', zero_phase=True)
    new_sr = sr/decimation_factor
    return NS_decimated_data, EW_decimated_data, Z_decimated_data, new_sr
# Results for all dataframes:  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# df:
NS_decimated_data_df, EW_decimated_data_df, Z_decimated_data_df, new_sr_df = subsample_windowed(NS_hann_result_df, EW_hann_result_df, Z_hann_result_df)
# df2:
NS_decimated_data_df2, EW_decimated_data_df2, Z_decimated_data_df2, new_sr_df2 = subsample_windowed(NS_hann_result_df2, EW_hann_result_df2, Z_hann_result_df2)
# df3:
NS_decimated_data_df3, EW_decimated_data_df3, Z_decimated_data_df3, new_sr_df3 = subsample_windowed(NS_hann_result_df3, EW_hann_result_df3, Z_hann_result_df3)
# df4:
NS_decimated_data_df4, EW_decimated_data_df4, Z_decimated_data_df4, new_sr_df4 = subsample_windowed(NS_hann_result_df4, EW_hann_result_df4, Z_hann_result_df4)
# df5:
NS_decimated_data_df5, EW_decimated_data_df5, Z_decimated_data_df5, new_sr_df5 = subsample_windowed(NS_hann_result_df5, EW_hann_result_df5, Z_hann_result_df5)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NS_taper_sub = [NS_decimated_data_df, NS_decimated_data_df2, NS_decimated_data_df3, NS_decimated_data_df4, NS_decimated_data_df5]
EW_taper_sub = [EW_decimated_data_df, EW_decimated_data_df2, EW_decimated_data_df3, EW_decimated_data_df4, EW_decimated_data_df5]
Z_taper_sub = [Z_decimated_data_df, Z_decimated_data_df2, Z_decimated_data_df3, Z_decimated_data_df4, Z_decimated_data_df5]

for i, dframe in enumerate(dataframes):
    if i ==0:
        print("dataframe: df")
    else:
        print(f"dataframe: df{i+1}")
    # Plot:
    fig, axes = plt.subplots(3, 1, figsize=(12, 12))
    time_minutes = dframe['time (s)']/60
    # North-South data:
    axes[0].plot(time_minutes, dframe['NS_Normalized_Shifted'], label='North-South (Full)', color='red')
    axes[0].plot(time_minutes[::7], NS_taper_sub[i], label='Tapered North-South (Step=7)', color='forestgreen') # make sure step for time_minutes=decimation_factor
    axes[0].set_ylabel('North-South Component')
    axes[0].set_title('Tapered/Subsampled and Full DNS North-South Data')
    axes[0].set_xlabel('Time (min)')
    axes[0].legend()
    axes[0].grid(True)
    
    # East-West data:
    axes[1].plot(time_minutes, dframe['EW_Normalized_Shifted'], label='East-West (Full)', color='red')
    axes[1].plot(time_minutes[::7], EW_taper_sub[i], label='Tapered East-West (Step=7)', color='rebeccapurple')
    axes[1].set_ylabel('East-West Component')
    axes[1].set_title('Tapered/Subsampled and Full DNS East-West Data')
    axes[1].set_xlabel('Time (min)')
    axes[1].legend()
    axes[1].grid(True)
    
    # Z data:
    axes[2].plot(time_minutes, dframe['Z_Normalized_Shifted'], label='Z (Full)', color='red')
    axes[2].plot(time_minutes[::7], Z_taper_sub[i], label='Tapered Z (Step=7)', color='royalblue')
    axes[2].set_ylabel('Z Component')
    axes[2].set_title('Tapered/Subsampled and Full DNS Z-Component Data')
    axes[2].set_xlabel('Time (min)')
    axes[2].legend()
    #axes[2].set_xlim(-0.001,0.01)
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.show()

***
### Fast Fourier Transform
##### Notes:
- Applying the fast fourier transform to subsampled and tapered DNS data and plotting the resulting magnitude and phase spectra
    - plotting with amplitude correction factor of 2 to compensate for coherent gain of 0.5 from windowing <br>
<br> Effects of subsampling/tapering:
    - removes higher frequency content
    - reduces spectral leakage caused by discontinuities

#### Approach 1: Overlay FFT results for tapered/subsampled data onto single figure with full DNS results

In [None]:
# Plotting FFT spectra showing tapered/subsampled data overlayed onto full DNS data for a given dataframe
def plot_fft(data, sr, fig1=None, fig2=None, fig3=None, name='', apply_window=True):
    """Plot FFT with windowing correction.
    data: array
    sr: Sampling rate
    apply_window : bool, says whether to apply Hann window (default True)"""
    n = len(data)
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        windowed_data = data * window
    else:
        windowed_data = data
        coherent_gain=1.0
    
    # Compute FFT
    fft_values = scipy.fft.rfft(windowed_data)
    freqs = scipy.fft.rfftfreq(n, d=1/sr)
    
    # magnitudes WITHOUT coherent gain correction
    magnitudes = np.abs(fft_values) / n

    # Double the AC components (not DC and Nyquist)
    magnitudes[1:] *= 2.0

    # Undo doubling for Nyquist if even length
    if n % 2 == 0:
        magnitudes[-1] /= 2.0
    
    phases = np.angle(fft_values)
    
    # Create figures:
    if fig1 is None:
        fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (Linear)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig1.number)
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        if name:
            plt.legend(loc='upper right')
    
    if fig2 is None:
        fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (log)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig2.number)
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        plt.legend()
        if name:
            plt.legend(loc='upper right')
    
    if fig3 is None:
        fig3 = plt.figure(figsize=(10, 6))
        plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
        plt.title('Phase Spectrum', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Phase (radians)', fontsize=14)
        plt.grid(True, alpha=0.5)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        #plt.xlim(-1, 50)
        plt.tight_layout()
        if name:
            plt.legend(loc='lower right', framealpha=0.5)
    else:
        plt.figure(fig3.number)
        plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
        if name:
            plt.legend(loc='lower right', framealpha=0.8)
    
    return fig1, fig2, fig3 
#####################################################################################################################
#Use to create figures for one dataframe at a time:

#North-South for df:
# For full NS data
fig1, fig2, fig3 = plot_fft(df['NS_Normalized_Shifted'].values, sr=original_sampling_rate, name='Full NS Data', apply_window=False)
# For tapered/subsampled data 
plot_fft(NS_sub_results_df, new_sr_df, fig1=fig1, fig2=fig2, fig3=fig3, name='Subsampled/Tapered NS Data', apply_window=True)
plt.show()

#East-West:
# For full EW data
fig1, fig2, fig3 = plot_fft(df['EW_Normalized_Shifted'].values, sr=original_sampling_rate, name='Full EW Data', apply_window=False)
# For tapered/subsampled data 
plot_fft(EW_sub_results_df, new_sr_df, fig1=fig1, fig2=fig2, fig3=fig3, name='Subsampled/Tapered EW Data', apply_window=True)
plt.show()

#Up-Down:
# For full Z data
fig1, fig2, fig3 = plot_fft(df['Z_Normalized_Shifted'].values, sr=original_sampling_rate, name='Full Z Data', apply_window=False)
# For tapered/subsampled data 
plot_fft(Z_sub_results_df, new_sr_df, fig1=fig1, fig2=fig2, fig3=fig3, name='Subsampled/Tapered Z Data', apply_window=True)
plt.show()

#### Approach 2: Overlay FFT spectra from all DataFrames onto 1 figure for each of the NS, EW, and Z directions
- Creates 3 figures per direction (NS, EW, Z): linear magnitude spectrum, log magnitude spectrum, and phase spectrum
    - 9 figures showing full DNS FFT results for all dataframes
    - 9 figures showing tapered/subsampled FFT results for all dataframes
- x-limits need to be adjusted to see details

In [None]:
# Updated FFT code, creates 9 figures total (3 for each of linear, log, phase)
# overlays FFT spectra from all Tyendinaga dataframes onto 1 figure (plots full DNS data separate from tapered/subsampled data)
def plot_fft(NS_data, EW_data, Z_data, sr, NS_fig1=None, NS_fig2=None, NS_fig3=None, EW_fig1=None, EW_fig2=None, EW_fig3=None, 
             Z_fig1=None, Z_fig2=None, Z_fig3=None, name='', apply_window=True, color=None):
    """Plot FFT with windowing correction
    NS_data, EW_data, Z_data: numpy arrays
    sr: Sampling rate
    apply_window: bool, says whether to apply Hann window (default True)"""
    n = len(NS_data)
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        NS_windowed = NS_data * window
        EW_windowed = EW_data * window
        Z_windowed = Z_data * window
    else:
        NS_windowed = NS_data
        EW_windowed = EW_data
        Z_windowed = Z_data
    
    # Compute FFT for each component
    NS_fft = scipy.fft.rfft(NS_windowed)
    EW_fft = scipy.fft.rfft(EW_windowed)
    Z_fft = scipy.fft.rfft(Z_windowed)
    
    freqs = scipy.fft.rfftfreq(n, d=1/sr)
    
    # Compute magnitudes
    NS_magnitudes = np.abs(NS_fft) / n
    EW_magnitudes = np.abs(EW_fft) / n
    Z_magnitudes = np.abs(Z_fft) / n
    
    # Double the AC components (not DC and Nyquist)
    NS_magnitudes[1:] *= 2.0
    EW_magnitudes[1:] *= 2.0
    Z_magnitudes[1:] *= 2.0
    
    # Undo doubling for Nyquist if even length
    if n % 2 == 0:
        NS_magnitudes[-1] /= 2.0
        EW_magnitudes[-1] /= 2.0
        Z_magnitudes[-1] /= 2.0
    
    # Compute phases
    NS_phases = np.angle(NS_fft)
    EW_phases = np.angle(EW_fft)
    Z_phases = np.angle(Z_fft)
 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
    # Plot NS component - Linear
    if NS_fig1 is None:
        NS_fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, NS_magnitudes, linewidth=1, label=name if name else 'NS', color=color)
        plt.title('FFT Magnitude Spectrum (Linear) - NS Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(NS_fig1.number)
        plt.plot(freqs, NS_magnitudes, linewidth=1, label=name if name else 'NS', color=color)
        if name:
            plt.legend(loc='upper right')
    
    # Plot NS component - Log
    if NS_fig2 is None:
        NS_fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], NS_magnitudes[1:], linewidth=1, label=name if name else 'NS', color=color)
        plt.title('FFT Magnitude Spectrum (log) - NS Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(NS_fig2.number)
        plt.loglog(freqs[1:], NS_magnitudes[1:], linewidth=1, label=name if name else 'NS', color=color)
        if name:
            plt.legend(loc='upper right')
    
    # Plot NS component - Phase
    if NS_fig3 is None:
        NS_fig3 = plt.figure(figsize=(10, 6))
        plt.plot(freqs[1:], NS_phases[1:], linewidth=1, label=name if name else 'NS', color=color)
        plt.title('Phase Spectrum - NS Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Phase (radians)', fontsize=14)
        plt.grid(True, alpha=0.5)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='lower right', framealpha=0.5)
    else:
        plt.figure(NS_fig3.number)
        plt.plot(freqs[1:], NS_phases[1:], linewidth=1, label=name if name else 'NS', color=color)
        if name:
            plt.legend(loc='lower right', framealpha=0.8)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
    # Repeat for EW component
    if EW_fig1 is None:
        EW_fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, EW_magnitudes, linewidth=1, label=name if name else 'EW', color=color)
        plt.title('FFT Magnitude Spectrum (Linear) - EW Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(EW_fig1.number)
        plt.plot(freqs, EW_magnitudes, linewidth=1, label=name if name else 'EW', color=color)
        if name:
            plt.legend(loc='upper right')
    
    if EW_fig2 is None:
        EW_fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], EW_magnitudes[1:], linewidth=1, label=name if name else 'EW', color=color)
        plt.title('FFT Magnitude Spectrum (log) - EW Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(EW_fig2.number)
        plt.loglog(freqs[1:], EW_magnitudes[1:], linewidth=1, label=name if name else 'EW', color=color)
        if name:
            plt.legend(loc='upper right')
    
    if EW_fig3 is None:
        EW_fig3 = plt.figure(figsize=(10, 6))
        plt.plot(freqs[1:], EW_phases[1:], linewidth=1, label=name if name else 'EW', color=color)
        plt.title('Phase Spectrum - EW Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Phase (radians)', fontsize=14)
        plt.grid(True, alpha=0.5)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='lower right', framealpha=0.5)
    else:
        plt.figure(EW_fig3.number)
        plt.plot(freqs[1:], EW_phases[1:], linewidth=1, label=name if name else 'EW', color=color)
        if name:
            plt.legend(loc='lower right', framealpha=0.8)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
    # Repeat for Z component
    if Z_fig1 is None:
        Z_fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, Z_magnitudes, linewidth=1, label=name if name else 'Z', color=color)
        plt.title('FFT Magnitude Spectrum (Linear) - Z Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(Z_fig1.number)
        plt.plot(freqs, Z_magnitudes, linewidth=1, label=name if name else 'Z', color=color)
        if name:
            plt.legend(loc='upper right')
    
    if Z_fig2 is None:
        Z_fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], Z_magnitudes[1:], linewidth=1, label=name if name else 'Z', color=color)
        plt.title('FFT Magnitude Spectrum (log) - Z Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(Z_fig2.number)
        plt.loglog(freqs[1:], Z_magnitudes[1:], linewidth=1, label=name if name else 'Z', color=color)
        if name:
            plt.legend(loc='upper right')
    
    if Z_fig3 is None:
        Z_fig3 = plt.figure(figsize=(10, 6))
        plt.plot(freqs[1:], Z_phases[1:], linewidth=1, label=name if name else 'Z', color=color)
        plt.title('Phase Spectrum - Z Component', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Phase (radians)', fontsize=14)
        plt.grid(True, alpha=0.5)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='lower right', framealpha=0.5)
    else:
        plt.figure(Z_fig3.number)
        plt.plot(freqs[1:], Z_phases[1:], linewidth=1, label=name if name else 'Z', color=color)
        if name:
            plt.legend(loc='lower right', framealpha=0.8)
    
    return NS_fig1, NS_fig2, NS_fig3, EW_fig1, EW_fig2, EW_fig3, Z_fig1, Z_fig2, Z_fig3
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define colors for each dataframe
colors = ['red', 'blue', 'green', 'orange', 'purple']
original_sampling_rate = 1024

# Plot all full data together:
NS_fig1_full, NS_fig2_full, NS_fig3_full, EW_fig1_full, EW_fig2_full, EW_fig3_full, Z_fig1_full, Z_fig2_full, Z_fig3_full = None, None, None, None, None, None, None, None, None

for i, dframe in enumerate(dataframes):
    # Extract the three components as numpy arrays
    ns_data = dframe['NS_Normalized_Shifted'].values
    ew_data = dframe['EW_Normalized_Shifted'].values
    z_data = dframe['Z_Normalized_Shifted'].values
    
    NS_fig1_full, NS_fig2_full, NS_fig3_full, EW_fig1_full, EW_fig2_full, EW_fig3_full, Z_fig1_full, Z_fig2_full, Z_fig3_full = plot_fft(
        ns_data, ew_data, z_data, original_sampling_rate, 
        NS_fig1=NS_fig1_full, NS_fig2=NS_fig2_full, NS_fig3=NS_fig3_full,
        EW_fig1=EW_fig1_full, EW_fig2=EW_fig2_full, EW_fig3=EW_fig3_full,
        Z_fig1=Z_fig1_full, Z_fig2=Z_fig2_full, Z_fig3=Z_fig3_full,
        name=f'df{i+1} (full)',
        apply_window=False, color=colors[i])

# Plot tapered/subsampled data together:
NS_fig1_sub, NS_fig2_sub, NS_fig3_sub, EW_fig1_sub, EW_fig2_sub, EW_fig3_sub, Z_fig1_sub, Z_fig2_sub, Z_fig3_sub = None, None, None, None, None, None, None, None, None

for i, (ns_data, ew_data, z_data) in enumerate(zip(NS_subsampled_data, EW_subsampled_data, Z_subsampled_data)):
    new_sr = original_sampling_rate / 7
    
    NS_fig1_sub, NS_fig2_sub, NS_fig3_sub, EW_fig1_sub, EW_fig2_sub, EW_fig3_sub, Z_fig1_sub, Z_fig2_sub, Z_fig3_sub = plot_fft(
        ns_data, ew_data, z_data, new_sr,
        NS_fig1=NS_fig1_sub, NS_fig2=NS_fig2_sub, NS_fig3=NS_fig3_sub,
        EW_fig1=EW_fig1_sub, EW_fig2=EW_fig2_sub, EW_fig3=EW_fig3_sub,
        Z_fig1=Z_fig1_sub, Z_fig2=Z_fig2_sub, Z_fig3=Z_fig3_sub,
        name=f'df{i+1} (subsampled)',
        apply_window=True, color=colors[i])

plt.show()

### Horizontal and Vertical FFTs
- Goal: Add NS and EW FFTs together to get one set of plots (Horizontal)
- Leave Z FFT plots as is (Vertical), plotted from a previous cell

#### Horizontal FFT Plots:

In [None]:
# overlays tapered/subsampled result onto full DNS result for given dataframe
def plot_fft_horizontal(data_ns, data_ew, sr, fig1=None, fig2=None, name='', apply_window=True):
    """Plot horizontal FFT spectrum from North-South and East-West components.
    data_ns: North-South array
    data_ew: East-West array
    sr: Sampling rate"""
    n = len(data_ns)
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        windowed_ns = data_ns * window
        windowed_ew = data_ew * window
    else:
        windowed_ns = data_ns
        windowed_ew = data_ew
    
    # Compute FFT for both components
    fft_ns = scipy.fft.rfft(windowed_ns)
    fft_ew = scipy.fft.rfft(windowed_ew)
    freqs = scipy.fft.rfftfreq(n, d=1/sr)
    
    # Compute magnitudes WITHOUT coherent gain correction
    mag_ns = np.abs(fft_ns) / n
    mag_ew = np.abs(fft_ew) / n
    
    # Double the AC components (not DC and Nyquist)
    mag_ns[1:] *= 2.0
    mag_ew[1:] *= 2.0
    
    # Undo doubling for Nyquist if even length
    if n % 2 == 0:
        mag_ns[-1] /= 2.0
        mag_ew[-1] /= 2.0
    
    # Combine as RMS for horizontal magnitude?
    #magnitudes = np.sqrt((mag_ns**2 + mag_ew**2) / 2)
    # Combine horizontal magnitudes with vector magnitude formula:
    magnitudes = np.sqrt((mag_ns**2) + (mag_ew**2))

    # Phases:
    phases_ns = np.angle(fft_ns) 
    phases_ew = np.angle(fft_ew)
    
    # Create figures:
    if fig1 is None:
        fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (Linear)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig1.number)
        plt.plot(freqs, magnitudes, linewidth=1, label=name)
        if name:
            plt.legend(loc='upper right')
    
    if fig2 is None:
        fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        plt.title('FFT Magnitude Spectrum (log)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.tight_layout()
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig2.number)
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name)
        plt.legend()
        if name:
            plt.legend(loc='upper right')
    
    #if fig3 is None:
     #   fig3 = plt.figure(figsize=(10, 6))
      #  plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
       # plt.title('Phase Spectrum', fontsize=16)
        #plt.xlabel('Frequency (Hz)', fontsize=14)
        #plt.ylabel('Phase (radians)', fontsize=14)
        #plt.grid(True, alpha=0.5)
        #plt.xticks(fontsize=14)
        #plt.yticks(fontsize=14)
        #plt.xlim(-1, 50)
        #plt.tight_layout()
        #if name:
         #   plt.legend(loc='lower right', framealpha=0.5)
    #else:
     #   plt.figure(fig3.number)
      #  plt.plot(freqs[1:], phases[1:], linewidth=1, label=name)
       # if name:
        #    plt.legend(loc='lower right', framealpha=0.8)
    
    return freqs, magnitudes, fig1, fig2#, fig3   Leaving phases out for now

print("horizontal FFT for dataframe: df")
# First call: Full data - creates new figures
freqs_full, mags_full, fig1, fig2 = plot_fft_horizontal(df['NS_Normalized_Shifted'].values, df['EW_Normalized_Shifted'].values, 
    sr=original_sampling_rate, name='Full Horizontal Data', apply_window=False)

# Second call: Subsampled/tapered data - reuses the same figures
freqs_sub, mags_sub, fig1, fig2 = plot_fft_horizontal(NS_sub_results_df, EW_sub_results_df, sr=new_sr_df,
    fig1=fig1,  # Pass the existing figures
    fig2=fig2, name='Subsampled/Tapered Horizontal Data', apply_window=True)

plt.show()

In [None]:
# alternative version: Plots horizontal FFT spectra for all dataframes on same figure (keeps full and tapered/subsampled data separate)
def plot_fft_horizontal(data_ns, data_ew, sr, fig1=None, fig2=None, name='', apply_window=True, color=None):
    """Plot horizontal FFT spectrum from North-South and East-West components.
    data_ns: North-South array
    data_ew: East-West array
    sr: Sampling rate"""
    n = len(data_ns)
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        windowed_ns = data_ns * window
        windowed_ew = data_ew * window
    else:
        windowed_ns = data_ns
        windowed_ew = data_ew
    
    # Compute FFT for both components
    fft_ns = scipy.fft.rfft(windowed_ns)
    fft_ew = scipy.fft.rfft(windowed_ew)
    freqs = scipy.fft.rfftfreq(n, d=1/sr)
    
    # Compute magnitudes WITHOUT coherent gain correction
    mag_ns = np.abs(fft_ns) / n
    mag_ew = np.abs(fft_ew) / n
    
    # Double the AC components (not DC and Nyquist)
    mag_ns[1:] *= 2.0
    mag_ew[1:] *= 2.0
    
    # Undo doubling for Nyquist if even length
    if n % 2 == 0:
        mag_ns[-1] /= 2.0
        mag_ew[-1] /= 2.0
    
    # Combine horizontal magnitudes with vector magnitude formula
    magnitudes = np.sqrt((mag_ns**2) + (mag_ew**2))
    
    # Phases:
    phases_ns = np.angle(fft_ns)
    phases_ew = np.angle(fft_ew)
    
    # Create figures:
    if fig1 is None:
        fig1 = plt.figure(figsize=(10,6))
        plt.plot(freqs, magnitudes, linewidth=1, label=name, color=color)
        plt.title('FFT Magnitude Spectrum (Linear)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        plt.minorticks_on()
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig1.number)
        plt.plot(freqs, magnitudes, linewidth=1, label=name, color=color)
        if name:
            plt.legend(loc='upper right')
    
    if fig2 is None:
        fig2 = plt.figure(figsize=(10, 6))
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name, color=color)
        plt.title('FFT Magnitude Spectrum (log)', fontsize=16)
        plt.xlabel('Frequency (Hz)', fontsize=14)
        plt.ylabel('Magnitude', fontsize=14)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
        plt.grid(True, which='major', alpha=0.5)
        plt.grid(True, which='minor', alpha=0.5)
        if name:
            plt.legend(loc='upper right')
    else:
        plt.figure(fig2.number)
        plt.loglog(freqs[1:], magnitudes[1:], linewidth=1, label=name, color=color)
        if name:
            plt.legend(loc='upper right')
    
    return freqs, magnitudes, fig1, fig2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define colors for each dataframe
colors = ['red', 'blue', 'green', 'orange', 'purple']

# First call: full data
fig1_full, fig2_full = None, None
for i, dframe in enumerate(dataframes):
    freqs_full, mags_full, fig1_full, fig2_full = plot_fft_horizontal(dframe['NS_Normalized_Shifted'].values, dframe['EW_Normalized_Shifted'].values,
        sr=original_sampling_rate, fig1=fig1_full, fig2=fig2_full, name=f'df{i+1} (full)', apply_window=False, color=colors[i])

# Second call: Subsampled/tapered data
fig1_sub, fig2_sub = None, None
for i, (ns_data, ew_data) in enumerate(zip(NS_subsampled_data, EW_subsampled_data)):
    new_sr = original_sampling_rate / 7
    freqs_sub, mags_sub, fig1_sub, fig2_sub = plot_fft_horizontal(ns_data, ew_data, sr=new_sr, fig1=fig1_sub, fig2=fig2_sub, 
        name=f'df{i+1} (subsampled)', apply_window=True, color=colors[i])

print("horizontal FFT plots:")
plt.show()

***
### FFT With Moving Window
##### Notes:
- Goal: apply moving window (20 second intervals)
- Using short time fourier transform from scipy.signal
- Issue with previous version of code: time was not the same dimensions as the STFT result
    - Possible solutions: Plot spectrogram to show frequency content overtime, or plot average magnitude spectrum
- Tried log scale for spectrogram frequency axis, did not display well for most full data plots

#### Spectrograms

In [None]:
# ShortTimeFFT(win, hop, fs, *, fft_mode='onesided', mfft=None, dual_win=None, scale_to=None, phase_shift=0)
# win = window function
# hop = time increment in signal samples for sliding window
# fs = sampling frequency of output signal and window
# fft_mode = Mode of FFT to be used (default ‘onesided’)
# mfft = Length of the FFT used, if a zero padded FFT is desired. If None (default), the length of the window win is used

# Problem with previous code (deleted): time not same dimensions as STFT result
# Possible solutions:
# Method 1: spectrogram
def windowed_fft_spectrogram(data, sr, plot_title, vmax, vmin, window='hann', window_duration=20):
    """Compute FFT with scipy short time fourier transform
    data: Time series to perform STFT on (array)
    sr: sampling rate
    plot_title: title for spectrogram plot (string)
    window: window function to taper the data with
    window_duration: window duration in seconds"""
    hop = int(sr*window_duration) # hop = number of samples in window_duration (20sec)
    
    # Generate hann window array
    window_array = np.hanning(hop)
    
    STF = ShortTimeFFT(win=window_array, hop=hop, fs=sr, fft_mode='onesided', mfft=None, scale_to='magnitude', phase_shift=0)
    STF_result = STF.stft(data)
    
    # Get time and frequency arrays
    time_axis = STF.t(len(data))  # Time values for each window (sec)
    time_minute = time_axis/60
    freq_axis = STF.f  # Frequency values
    
    # Plot spectrogram
    stft_plot = plt.figure(figsize=(12,4))
    plt.pcolormesh(time_minute, freq_axis, np.abs(STF_result), cmap='binary', shading='gouraud', vmax=vmax, vmin=vmin)
    plt.title(plot_title)
    plt.xlabel("Time (min)")
    plt.ylabel("Frequency (Hz)")
    #plt.yscale('log')
    cbar=plt.colorbar(label='Magnitude')
    cbar.formatter.set_powerlimits((0, 0))
    plt.grid(which='major')
    plt.grid(which='minor')
    
    return stft_plot

for i, dframe in enumerate(dataframes):
    if i==0:
        print("dataframe: df")
    else:
        print(f"dataframe: df{i+1}")
    # Plot!------------------------------------------------------------
    # Spectrograms for NS data (df):
    print("Spectrograms for North-South data")
    # Full NS data:
    windowed_fft_spectrogram(dframe['NS_Normalized_Shifted'].values, sr=original_sampling_rate, 
                             plot_title='Short Time Fourier Transform With 20s Hann Window (Full NS Data)', vmax=0.001, vmin=0,
                             window='hann', window_duration=20)
    plt.ylim(top=500)
    plt.tight_layout()
    plt.show()
    # Tapered/subsampled NS data:
    windowed_fft_spectrogram(NS_taper_sub[i], sr=new_sr_df, plot_title='Short Time Fourier Transform With 20s Hann Window (Tapered/Subsampled NS Data)',
                             vmax=0.001, vmin=0, window='hann', window_duration=20) 
    # new sampling rate should be the same for all dataframes because they have been subsampled the same
    plt.ylim(top=70)
    plt.tight_layout()
    plt.show()
    #---------------------------------------------------------------------------------
    # Spectrograms for EW data (df):
    print("Spectrograms for East-West data")
    # Full EW data:
    windowed_fft_spectrogram(dframe['EW_Normalized_Shifted'].values, sr=original_sampling_rate, 
                             plot_title='Short Time Fourier Transform With 20s Hann Window (Full EW Data)', vmax=0.001, vmin=0,
                             window='hann', window_duration=20)
    plt.ylim(top=500)
    plt.tight_layout()
    plt.show()
    # Tapered/subsampled EW data:
    windowed_fft_spectrogram(EW_taper_sub[i], sr=new_sr_df, plot_title='Short Time Fourier Transform With 20s Hann Window (Tapered/Subsampled EW Data)',
                             vmax=0.001, vmin=0, window='hann', window_duration=20)
    plt.ylim(top=70)
    plt.tight_layout()
    plt.show()
    #--------------------------------------------------------------------------------
    # Spectrograms for Z-component data for df
    print("Spectrograms for Z-component data")
    # Full Z data:
    windowed_fft_spectrogram(dframe['Z_Normalized_Shifted'].values, sr=original_sampling_rate, 
                             plot_title='Short Time Fourier Transform With 20s Hann Window (Full Z Data)', vmax=0.001, vmin=0,
                             window='hann', window_duration=20)
    plt.ylim(top=500)
    plt.tight_layout()
    plt.show()
    # Tapered/subsampled Z data:
    windowed_fft_spectrogram(Z_taper_sub[i], sr=new_sr_df, plot_title='Short Time Fourier Transform With 20s Hann Window (Tapered/Subsampled Z Data)',
                             vmax=0.001, vmin=0, window='hann', window_duration=20)
    plt.ylim(top=70)
    plt.tight_layout()
    plt.show()
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

***
### H/V Ratio
#### Notes:
- Divide horizontal magnitude by vertical magnitude to get H/V ratio
- Used vector-magnitude formula to combine horizontal magnitudes for North-South and East-West components

#### Filter for Smoothing
- applied Savitzky-Golay filter for smoothing data
    - also tried an envelope function for smoothing using the Hilbert transform
- limited displayed frequency range

In [None]:
## Adding envelope for smoothing using Hilbert transform:
def envelope(input_data):
    """Compute envelope of input_data using Hilbert transform"""
    signal= hilbert(input_data)
    return np.abs(signal)

In [None]:
def HV_ratio(data_ns, data_ew, data_z, sr, apply_window=True, freq_min=0.1, freq_max=None):#, clip_max=10):
    """Compute Horizontal-to-Vertical Spectral Ratio (HVSR)
    data_ns: North-South component (array)
    data_ew: East-West component (array)
    data_z: Vertical (Z) component (array)
    sr: Sampling rate
    freq_min: Minimum frequency to include (Hz)
    freq_max: Maximum frequency to include (Hz), default is 80% of Nyquist
    clip_max: Maximum HVSR value to allow"""
    n = len(data_ns)
    
    if freq_max is None:
        freq_max = 0.8*(sr/2)  # 80% of Nyquist to avoid edge effects
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        data_ns = data_ns*window
        data_ew = data_ew*window
        data_z = data_z*window
    
    # Compute FFTs
    fft_ns = np.fft.rfft(data_ns)
    fft_ew = np.fft.rfft(data_ew)
    fft_z = np.fft.rfft(data_z)
    
    freqs = np.fft.rfftfreq(n, d=1/sr)
    
    # Compute magnitudes
    mag_ns = np.abs(fft_ns) / n
    mag_ew = np.abs(fft_ew) / n
    mag_z = np.abs(fft_z) / n
    
    # Double AC components for one-sided spectrum correction
    mag_ns[1:] *= 2.0
    mag_ew[1:] *= 2.0
    mag_z[1:] *= 2.0
    
    if n % 2 == 0:
        mag_ns[-1] /= 2.0
        mag_ew[-1] /= 2.0
        mag_z[-1] /= 2.0
    
    # Use vector-magnitude formula to combine horizontal magnitudes:
    mag_horizontal = np.sqrt((mag_ns**2) + (mag_ew**2))
    
    # Compute HVSR with threshold to avoid division by tiny numbers
    z_threshold = np.max(mag_z) * 1e-8
    hvsr = np.zeros_like(mag_horizontal)
    mask = mag_z > z_threshold
    hvsr[mask] = mag_horizontal[mask] / mag_z[mask]
    
    # Clip extreme values?
    #hvsr = np.clip(hvsr, 0, clip_max) # comment out when don't want to use
    
    # Apply frequency range mask
    freq_mask = (freqs >= freq_min) & (freqs <= freq_max)
    
    return freqs[freq_mask], hvsr[freq_mask]
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
print("dataframe: df")
# Compute HVSR for full data (df)
freqs1, hvsr1 = HV_ratio(df['NS_Normalized_Shifted'].values, df['EW_Normalized_Shifted'].values, df['Z_Normalized_Shifted'].values,
                sr=original_sampling_rate, apply_window=False, freq_min=0.1, freq_max=300)#,  # Or use None for auto freq_max
                #clip_max=10)

# Compute HVSR for tapered/subsampled data (df)
freqs2, hvsr2 = HV_ratio(NS_sub_results_df, EW_sub_results_df, Z_sub_results_df, sr=new_sr_df, apply_window=True, freq_min=0.1,
    freq_max=70)#,  # Should be less than decimated Nyquist (~73 Hz for subsampling step=7)
    #clip_max=10)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Apply smoothing with Savitzky-Golay filter
hvsr1_smooth = savgol_filter(hvsr1, window_length=20481, polyorder=2) ## For 20s window, 20s*(sampling rate in /s) = number of samples in 20 sec
# Note: window length should be an odd integer to allow the filter window to be centered symmetrically on the current data point
hvsr2_smooth = savgol_filter(hvsr2, window_length=2925, polyorder=2) # 20*new_sr_df = approx. 2925
#~~~~~~~~~~~~~~~ Testing envelope with Hilbert transform ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# call envelope function from previous cell:
#hvsr1_envelope = envelope(hvsr1_smooth)
#hvsr2_envelope = envelope(hvsr2_smooth)
# doesn't work well, adds spikes at edges of plot
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Try second pass through Savitzky-Golay filter instead:
hvsr1_smooth = savgol_filter(hvsr1_smooth, window_length=20001, polyorder=2)
hvsr2_smooth = savgol_filter(hvsr2_smooth, window_length=10001, polyorder=2)
# third pass?
hvsr1_smooth = savgol_filter(hvsr1_smooth, window_length=10001, polyorder=2)
hvsr2_smooth = savgol_filter(hvsr2_smooth, window_length=10001, polyorder=2)
hvsr2_smooth = savgol_filter(hvsr2_smooth, window_length=10001, polyorder=2) # 4th pass for tapered/subsampled data
hvsr2_smooth = savgol_filter(hvsr2_smooth, window_length=10001, polyorder=2) # 5th pass for tapered/subsampled data
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Find main peak (full data)
peak_idx = np.argmax(hvsr1_smooth[(freqs1 > 10) & (freqs1 < 50)])
peak_freq = freqs1[(freqs1 > 10) & (freqs1 < 50)][peak_idx]
peak_value = hvsr1_smooth[(freqs1 > 10) & (freqs1 < 50)][peak_idx]
print(f"Full data peak frequency: {peak_freq} Hz")
print(f"Full data peak H/V: {peak_value}")
# (tapered/subsampled data):
peak_idx2 = np.argmax(hvsr2_smooth[(freqs2 > 10) & (freqs2 < 50)])
peak_freq2 = freqs2[(freqs2 > 10) & (freqs2 < 50)][peak_idx2]
peak_value2 = hvsr2_smooth[(freqs2 > 10) & (freqs2 < 50)][peak_idx2]
print(f"Tapered/Subsampled data peak frequency: {peak_freq2} Hz")
print(f"Tapered/Subsampled data peak H/V: {peak_value2} Hz")

# Plot
plt.figure(figsize=(12, 6))
# Smoothed data 
step=1 # can use data step > 1 if preferred to plot every nth point
plt.semilogx(freqs1[::step], hvsr1_smooth[::step], label="H/V From Full Data") 
plt.semilogx(freqs2[::step], hvsr2_smooth[::step], label="H/V From Tapered/Subsampled Data")
plt.axhline(y=1, color='grey', linestyle='--', alpha=0.5, label='H/V = 1')
plt.xlabel('Frequency (Hz)')
plt.ylabel('H/V Ratio')
plt.title('Smoothed Horizontal-to-Vertical Spectral Ratio (HVSR)')
plt.ylim([0, 5])  # Adjust based on your data
plt.grid(True, which="both", alpha=0.5)
plt.axvline(x=30.63, linestyle="--", color='green', label='30.63 Hz')
plt.legend(loc='upper left')
plt.show()
print("Note: 30.63 +/- 3.81 Hz is the max. H/V from screenshot of Grilla average H/V plot")

In [None]:
# updated version - in progress:
def HV_ratio(data_ns, data_ew, data_z, sr, apply_window=True, freq_min=0.1, freq_max=None, clip_max=10):
    """Compute Horizontal-to-Vertical Spectral Ratio (HVSR)
    data_ns: North-South component (array)
    data_ew: East-West component (array)
    data_z: Vertical (Z) component (array)
    sr: Sampling rate
    freq_min: Minimum frequency to include (Hz)
    freq_max: Maximum frequency to include (Hz), default is 80% of Nyquist
    clip_max: Maximum HVSR value to allow"""
    n = len(data_ns)
    
    if freq_max is None:
        freq_max = 0.8*(sr/2)  # 80% of Nyquist
    
    # Apply window if requested
    if apply_window:
        window = np.hanning(n)
        data_ns = data_ns * window
        data_ew = data_ew * window
        data_z = data_z * window
    
    # Compute FFTs
    fft_ns = np.fft.rfft(data_ns)
    fft_ew = np.fft.rfft(data_ew)
    fft_z = np.fft.rfft(data_z)
    
    freqs = np.fft.rfftfreq(n, d=1/sr)
    
    # Compute magnitudes
    mag_ns = np.abs(fft_ns) / n
    mag_ew = np.abs(fft_ew) / n
    mag_z = np.abs(fft_z) / n
    
    # Double AC components for one-sided spectrum correction
    mag_ns[1:] *= 2.0
    mag_ew[1:] *= 2.0
    mag_z[1:] *= 2.0
    
    if n % 2 == 0:
        mag_ns[-1] /= 2.0
        mag_ew[-1] /= 2.0
        mag_z[-1] /= 2.0
    
    # Use vector-magnitude formula to combine horizontal magnitudes:
    mag_horizontal = np.sqrt((mag_ns**2) + (mag_ew**2))
    
    # Compute HVSR with threshold to avoid division by tiny numbers
    z_threshold = np.max(mag_z) * 1e-8
    hvsr = np.zeros_like(mag_horizontal)
    mask = mag_z > z_threshold
    hvsr[mask] = mag_horizontal[mask] / mag_z[mask]
    
    # Clip extreme values?
    hvsr = np.clip(hvsr, 0, clip_max)  # comment out when don't want to use
    
    # Apply frequency range mask
    freq_mask = (freqs >= freq_min) & (freqs <= freq_max)
    
    return freqs[freq_mask], hvsr[freq_mask]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define colors
colors = ['red', 'blue', 'green', 'orange', 'purple']

# Compute HVSR for all dataframes
hvsr_full_list = []
freqs_full_list = []
for i, dframe in enumerate(dataframes):
    freqs_full, hvsr_full = HV_ratio(dframe['NS_Normalized_Shifted'].values, dframe['EW_Normalized_Shifted'].values, dframe['Z_Normalized_Shifted'].values,
        sr=original_sampling_rate, apply_window=False, freq_min=0.1, freq_max=300,  # Or use None for auto
        clip_max=10)
    freqs_full_list.append(freqs_full)
    hvsr_full_list.append(hvsr_full)

# Compute HVSR for tapered/subsampled data
hvsr_sub_list = []
freqs_sub_list = []
new_sr = original_sampling_rate / 7
nyquist_sub = new_sr / 2  # About 73 Hz
print("Nyquist for subsampled data:", nyquist_sub)
for i, (ns_data, ew_data, z_data) in enumerate(zip(NS_subsampled_data, EW_subsampled_data, Z_subsampled_data)):
    freqs_sub, hvsr_sub = HV_ratio(ns_data, ew_data, z_data, sr=new_sr, apply_window=True, freq_min=0.1,
        freq_max=70,  # Should be less than decimated Nyquist
        clip_max=10)
    freqs_sub_list.append(freqs_sub)
    hvsr_sub_list.append(hvsr_sub)

# Savitzky-Golay filter for smoothing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# For 20s window at sampling rate: window_length should be odd integer
window_length_full = 20 * original_sampling_rate  # 20 seconds worth of samples
window_length_sub = 40 * new_sr  # 40 seconds worth of samples
polyorder=2

hvsr_full_smooth_list = []
for hvsr in hvsr_full_list:
    # Make sure window_length is not larger than data length and is odd
    wl = min(int(window_length_full), len(hvsr))
    if wl % 2 == 0:
        wl -= 1  # Make it odd
    if wl < polyorder + 2:
        wl = polyorder + 2
        if wl % 2 == 0:
            wl += 1
    hvsr_smooth = savgol_filter(hvsr, window_length=wl, polyorder=polyorder)
    hvsr_smooth = savgol_filter(hvsr_smooth, window_length=wl, polyorder=polyorder) # second pass through smoothing filter
    hvsr_full_smooth_list.append(hvsr_smooth)

hvsr_sub_smooth_list = []
for hvsr in hvsr_sub_list:
    wl = min(int(window_length_sub), len(hvsr))
    if wl % 2 == 0:
        wl -= 1
    if wl < polyorder + 2:
        wl = polyorder + 2
        if wl % 2 == 0:
            wl += 1
    hvsr_smooth = savgol_filter(hvsr, window_length=wl, polyorder=polyorder)
    hvsr_smooth = savgol_filter(hvsr_smooth, window_length=wl, polyorder=polyorder) # second pass through smoothing filter
    hvsr_smooth = savgol_filter(hvsr_smooth, window_length=wl, polyorder=polyorder) # third pass through smoothing filter
    hvsr_smooth = savgol_filter(hvsr_smooth, window_length=wl, polyorder=polyorder) # fourth pass through smoothing filter
    hvsr_sub_smooth_list.append(hvsr_smooth)

# Find main peak for each dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
print("Full Data Peak Frequencies:")
for i, (freqs, hvsr_smooth) in enumerate(zip(freqs_full_list, hvsr_full_smooth_list)):
    mask = (freqs > 10) & (freqs < 50)
    if np.any(mask):
        peak_idx = np.argmax(hvsr_smooth[mask])
        # Adjust index for the mask
        mask_indices = np.where(mask)[0]
        peak_idx_full = mask_indices[peak_idx]
        peak_freq = freqs[peak_idx_full]
        peak_value = hvsr_smooth[peak_idx_full]
        print(f"df{i+1}: {peak_freq:.2f} Hz (value: {peak_value:.2f})")

print("\nTapered/Subsampled Data Peak Frequencies:")
for i, (freqs, hvsr_smooth) in enumerate(zip(freqs_sub_list, hvsr_sub_smooth_list)):
    mask = (freqs > 10) & (freqs < 50)
    if np.any(mask):
        peak_idx = np.argmax(hvsr_smooth[mask])
        mask_indices = np.where(mask)[0]
        peak_idx_full = mask_indices[peak_idx]
        peak_freq = freqs[peak_idx_full]
        peak_value = hvsr_smooth[peak_idx_full]
        print(f"df{i+1}: {peak_freq:.2f} Hz (value: {peak_value:.2f})")

# Plot full data together ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plt.figure(figsize=(12, 6))
step = 1  # Plot every nth point to make plot cleaner
for i, (freqs, hvsr_smooth) in enumerate(zip(freqs_full_list, hvsr_full_smooth_list)):
    plt.semilogx(freqs[::step], hvsr_smooth[::step], label=f"df{i+1} (full)", color=colors[i], linewidth=1.5)

plt.axhline(y=1, color='grey', linestyle='--', alpha=0.5, label='H/V = 1')
plt.xlabel('Frequency (Hz)')
plt.ylabel('H/V Ratio')
plt.title('Smoothed Horizontal-to-Vertical Spectral Ratio (HVSR) - Full Data')
#plt.ylim(0, 5)  # Adjust based on data
plt.grid(True, which="both", alpha=0.5)
plt.legend(loc='upper left')
plt.tight_layout()

# Plot tapered/subsampled data together
plt.figure(figsize=(12, 6))
for i, (freqs, hvsr_smooth) in enumerate(zip(freqs_sub_list, hvsr_sub_smooth_list)):
    plt.semilogx(freqs[::step], hvsr_smooth[::step], label=f"df{i+1} (subsampled)", color=colors[i], linewidth=1.5)

plt.axhline(y=1, color='grey', linestyle='--', alpha=0.5, label='H/V = 1')
plt.xlabel('Frequency (Hz)')
plt.ylabel('H/V Ratio')
plt.title('Smoothed Horizontal-to-Vertical Spectral Ratio (HVSR) - Tapered/Subsampled Data')
#plt.ylim(0, 5)  # Adjust based on the data
plt.grid(True, which="both", alpha=0.5)
plt.legend(loc='upper left')
plt.tight_layout()

plt.show()

# Print max H/V values
print("\nMax H/V for Full Data:")
for i, hvsr_smooth in enumerate(hvsr_full_smooth_list):
    print(f"df{i+1}: {hvsr_smooth[::step].max():.2f}")

print("\nMax H/V for Tapered/Subsampled Data:")
for i, hvsr_smooth in enumerate(hvsr_sub_smooth_list):
    print(f"df{i+1}: {hvsr_smooth[::step].max():.2f}")

#### Interpreting Horizontal-to-Vertical Spectral Ratios
- Peaks should occur at fundamental and higher order resonance frequencies
    - (fundamental frequency = lowest resonant frequency in a system)
- Typically, the fundamental resonance frequency is the first/lowest value indicated by the highest amplitude peak on the H/V frequency spectrum
- H/V = 1 indicates that the horizontal and vertical motions are equal
- H/V < 1 indicates that horizontal motion is weaker than vertical motion
- H/V > 1 indicates that horizontal motion is stronger than vertical motion

### Plotting H/V Ratio From Grilla .txt File:

In [None]:
# Trying to use H/V .txt file downloaded from Grilla:
column_names = ['freq.', 'H/V']
dtf = pd.read_table('Tyendinaga_HV_ratio.asc', delimiter=r'\s+', encoding='latin-1', skiprows=31, names=column_names, usecols=column_names)
dtf.head().style

In [None]:
plt.figure(figsize=(12,6))
plt.plot(dtf['freq.'], dtf['H/V'], label="H/V Ratio from Grilla Data File")
plt.xlabel("Frequency (Hz)")
plt.ylabel("H/V Ratio")
plt.title("H/V Ratio from Grilla Data File")
plt.axvline(x=30.63, color='red', linestyle='--', label="30.63 Hz")
plt.legend()
plt.grid(True, alpha=0.5)
plt.xscale('log')
plt.show()

In [None]:
df.head()

In [None]:
# Remove unneeded columns: NS, EW, Z, nsL, ewL, zL, aY, aX, aZ, NS_Detrended, EW_Detrended, Z_Detrended, NS_Normalized, EW_Normalized, Z_Normalized
# Will be left with DataFrame containing time (s) and the Normalized_Shifted data for NS, EW, and Z components
df_updated = df.drop(['NS', 'EW', 'Z', 'nsL', 'ewL', 'zL', 'aY', 'aX', 'aZ', 'NS_Detrended', 'EW_Detrended', 'Z_Detrended', 'NS_Normalized', 
                      'EW_Normalized', 'Z_Normalized'], axis=1)
df_updated.head()