### Filtering trench motions outputed from vtk scripts

Outputs from the vtk script is jugged possibly because of the interpolation used in that approach.
In this way, we need to filter out the noises before reaching publishable results.

#### Summary
* resample to 5e5 or 1e6 would give a reasonable result
* By applying a fourier filter, it helps to remove some noises in between but it doesn't keep the boundary points in place.

In [None]:
import numpy as np
import os
from matplotlib import pyplot as plt
from matplotlib import gridspec
from scipy.interpolate import CubicSpline

### Load the dataset

This dataset contains "pvtu_step", "step", "time", "trench (rad)", "slab depth(m)", "100km dip (rad)" as entries

In [None]:
Ro = 6371e3  # radius of Earth
data_file = "slab_morph.txt"
assert(os.path.isfile(data_file))
data = np.loadtxt(data_file)
ts = data[:, 2]
trenches = data[:, 3]
# print(ts) # debug

### Plot the position of the trench

In [None]:
fig = plt.figure(tight_layout=True)
gs = gridspec.GridSpec(2, 1)
ax = fig.add_subplot(gs[0, 0])
ax.plot(ts, trenches)
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
# derive the velocity and plot
trench_velocities = np.gradient(trenches, ts) * Ro
ax1 = fig.add_subplot(gs[1, 0])
ax1.plot(ts, trench_velocities)
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
fig.show()


### Resample to a uniform grid

In the following example, I resample this time series to a 5e5-year interval.
Then I fitted it with a cubic spline to smooth the curve.
The resampling itself does a good job in removing the noises, but the effects of the filtering is not appearant.

In [None]:
tmax = np.max(ts)
# resample by a larger interval
t_inter = 0.5e6 # resample interval
ts_re = np.arange(0.0, tmax, t_inter)
print("The number of entries in array: ", ts_re.size)  # print the size of resampling
trenches_re = np.interp(ts_re, ts, trenches)
trench_velocities_re = np.gradient(trenches_re, ts_re) * Ro # velocity
# fit by a cubic spline
t_inter_fit = 1e5 # resample interval
ts_fit = np.arange(0.0, tmax, t_inter_fit)
cs_trench = CubicSpline(ts_re, trenches_re)
cs_velocity = CubicSpline(ts_re, trench_velocities_re)

# plot
fig = plt.figure(tight_layout=True)
gs = gridspec.GridSpec(2, 3)
# plot original data
ax = fig.add_subplot(gs[0, 0])
ax.plot(ts, trenches)
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
ax1 = fig.add_subplot(gs[1, 0])
ax1.plot(ts, trench_velocities)
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
# plot resampled data
ax = fig.add_subplot(gs[0, 1])
ax.plot(ts_re, trenches_re)
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
ax1 = fig.add_subplot(gs[1, 1])
ax1.plot(ts_re, trench_velocities_re)
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
# plot fitted data
ax = fig.add_subplot(gs[0, 2])
ax.plot(ts_fit, cs_trench(ts_fit))
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
ax1 = fig.add_subplot(gs[1, 2])
ax1.plot(ts_fit, cs_velocity(ts_fit))
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
fig.show()

### Define the filter

a. FFT transform of a time series

b. Remove high-frequency entries in the spectrum

c. invert FFT to get the clean data

#### Schemes of filtering

Here, I have also included a few different schemes in filtering.

"magnitude" allows the choosing of a magnitude and filter out smaller entries in the spectrum.

"n_components" leaves the n_components number of small frequency entries in the spectrum.

"spectrums" filters out the selected spectrums.

In [None]:
def fft_denoiser(x, to_real=True, **kwargs):
    """Fast fourier transform denoiser.
    
    Denoises data using the fast fourier transform.
    
    Parameters
    ----------
    x : numpy.array
        The data to denoise.
    n_components : int
        The value above which the coefficients will be kept.
    to_real : bool, optional, default: True
        Whether to remove the complex part (True) or not (False)
    kwargs: disc
        n_component: int
            if this value is given, truncate the spectrum with the first n_components
        magnitude: float
            if this value is given, truncate the spectrum by magnitude of entries.
        spectrums: list of list of int
            if this is given, filter out components in this spectrums
        
    Returns
    -------
    clean_data : numpy.array
        The denoised data.
        
    References
    ----------, []
    .. [1] Steve Brunton - Denoising Data with FFT[Python]
       https://www.youtube.com/watch?v=s2K1JfNR7Sc&ab_channel=SteveBrunton
    
    """
    n = len(x)
    n_components = kwargs.get('n_components', None)
    magnitude = kwargs.get('magnitude', None)
    spectrums = kwargs.get('spectrums')
    assert(type(n_components) is int or type(magnitude) is float or type(spectrums) is list)
    if type(spectrums) is list:
        for spectrum in spectrums:
            assert(len(spectrum) == 2)
            assert(type(spectrum[0]) is int)
            assert(type(spectrum[1]) is int)

    
    # compute the fft
    fft = np.fft.fft(x, n)
    
    # compute power spectrum density
    # squared magnitud of each fft coefficient
    PSD = fft * np.conj(fft) / n
    
    # keep high magnitudes
    _mask = [True for i in range(n)]  # initiate as false
    if type(magnitude) is float:
        _mask = PSD > magnitude  # mask by magnitude
    elif type(n_components) is int:
        for i in range(n_components, n):
            _mask[i] = False  # mask by components
    elif type(spectrums) is list:
        for spectrum in spectrums:
            for i in range(spectrum[0], spectrum[1]):
                _mask[i] = False
    fft = _mask * fft
    
    # inverse fourier transform
    clean_data = np.fft.ifft(fft)
    
    if to_real:
        clean_data = clean_data.real
    
    return clean_data, PSD

### Filter the data

First, I resampled to a 1e5 interval, then apply the FFT filter.

In [None]:
# filter trenche motion
# trenches_filtered = fft_denoiser(trenches_re, magnitude=1e-5) # try the magnitude scheme
t_inter = 1e5  # resample interval
ts_re = np.arange(0.0, tmax, t_inter)
trenches_uni_time = np.interp(ts_re, ts, trenches)
print(ts_re.size)  # debug
trenches_filtered, PSD = fft_denoiser(trenches_uni_time, n_components=100) # try the n_component scheme
# trenches_filtered, PSD = fft_denoiser(trenches_re, spectrums=[[50, 100]]) # try the spectrum scheme

# debug, plot the PSD
fig, ax = plt.subplots()
ax.semilogy(range(len(PSD)), PSD, 'b.')
ax.set_xlabel("n")
ax.set_ylabel("PSD")
# derive velocity
trench_velocities_filtered = np.gradient(trenches_filtered, ts_re) * Ro
# plot
fig = plt.figure(tight_layout=True)
gs = gridspec.GridSpec(2, 2)
ax = fig.add_subplot(gs[0, 0])
ax.plot(ts, trenches)
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
ax1 = fig.add_subplot(gs[1, 0])
ax1.plot(ts, trench_velocities)
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
ax = fig.add_subplot(gs[0, 1])
ax.plot(ts_re, trenches_filtered)
ax.set_xlabel('Time (yr)')
ax.set_ylabel('Trench (rad)')
ax1 = fig.add_subplot(gs[1, 1])
ax1.plot(ts_re, trench_velocities_filtered)
ax1.set_xlabel('Time (yr)')
ax1.set_ylabel('Trench motion (m/yr)')
print("The number of entries in array: ", ts_re.size)
fig.show()