# Signal processing with Scipy
Below, we will explore different methods for processing and cleaning signal with Scipy. I haven't worked extensively with signal processing, so if there is any expert out there who would have insight or suggestion in improving this work, please let me know!

## About Scipy
> SciPy (pronounced “Sigh Pie”) is open-source software for mathematics, science, and engineering.

![scipy](https://docs.scipy.org/doc/scipy/reference/_static/img/scipy_org_logo.gif)

Scipy is an extremely useful library for scientific and numerical computing in Python. It contains very useful submodules for Optimization, Fast Fourier Transform, Linear Algebra, Matrix Encoding, and Image Processing. In fact, Scikit-learn uses it extensively for maniputating large sparse matrix, and for algorithms such as Ordinary Least Square!

We will focus on the `signal` processing submodule for this exploration notebook.

This is a work in progress, so please tune in for more updates!

### References:
* Loading data (signal and target): https://www.kaggle.com/theoviel/fast-fourier-transform-denoising
* Read data: https://www.kaggle.com/sohier/reading-the-data-with-python

### Recommendations:
FFT Kernel (We won't cover this since the following notebook cover it extensively):
* https://www.kaggle.com/theoviel/fast-fourier-transform-denoising

Official Scipy Tutorial on Signal Processing:
* https://docs.scipy.org/doc/scipy/reference/tutorial/signal.html


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
from scipy import signal

sns.set_style("whitegrid")

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Importing and Preprocessing

In [None]:
signals = pq.read_table('../input/train.parquet', columns=[str(i) for i in range(999)]).to_pandas()
signals = signals.values.T.reshape((999//3, 3, 800000))

In [None]:
train_df = pd.read_csv('../input/metadata_train.csv')
train_df.head()

In [None]:
target = train_df['target'][::3]
target.value_counts()

## 1. Applying Convolutions

In [None]:
def apply_convolution(sig, window):
    """Apply a simple same-size convolution with a given window size"""
    conv = np.repeat([0., 1., 0.], window)
    filtered = signal.convolve(sig, conv, mode='same') / window
    return filtered

Here, we take an example with negative target.

### Negative Target

In [None]:
plt.figure(figsize=(15, 10))
window = 10

for phase in range(3):
    sig = signals[0, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

In [None]:
plt.figure(figsize=(15, 10))
window = 100

for phase in range(3):
    sig = signals[0, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

In [None]:
plt.figure(figsize=(15, 10))
window = 1000

for phase in range(3):
    sig = signals[0, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

Now, let's see a positive target.

### Positive Target

In [None]:
plt.figure(figsize=(15, 10))
window = 10

for phase in range(3):
    sig = signals[1, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

In [None]:
plt.figure(figsize=(15, 10))
window = 100

for phase in range(3):
    sig = signals[1, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

In [None]:
plt.figure(figsize=(15, 10))
window = 1000

for phase in range(3):
    sig = signals[1, phase, :]
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    convolved = apply_convolution(sig, window)
    plt.plot(convolved, label=f'Phase {phase} Convolved')

plt.legend()
plt.title(f"Applying convolutions - Window Size {window}", size=15)
plt.show()

## 2. C-Spline

### Negative Target

In [None]:
%%time
smoothing = 0
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

In [None]:
smoothing = 1
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

In [None]:
smoothing = 10
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

### Positive Target

In [None]:
smoothing = 0
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

In [None]:
smoothing = 1
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

In [None]:
smoothing = 10
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.cspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Cubic Spline, Smoothing: {smoothing}", size=15)
plt.show()

## 3. Q-Spline

Scipy does not support smoothing Q-Spline yet, so we set it to be 0. 

### Negative Target

In [None]:
%%time
# Start with negative target.
smoothing = 0
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.qspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Quadratic Spline, Smoothing: {smoothing}", size=15)
plt.show()

### Positive Target

In [None]:
smoothing = 0
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.qspline1d(sig, smoothing)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Quadratic Spline, Smoothing: {smoothing}", size=15)
plt.show()

## 4. Median Filtering

### Negative Example

In [None]:
%%time
kernel_size = 1
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 11
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 51
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 101
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

### Positive Example

In [None]:
kernel_size = 1
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 11
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 51
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

In [None]:
kernel_size = 101
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    filtered = signal.medfilt(sig, kernel_size)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying Median Filters, Kernel Size: {kernel_size}", size=15)
plt.show()

## 5. Digital Filters (IIR)

#### Butter Filter Design

This is what we will be using:

> Butterworth digital and analog filter design.
> Design an Nth-order digital or analog Butterworth filter and return the filter coefficients.

In this case the Numerator (b) and denominator (a) polynomials of the IIR filter are returned. The following describes the Wn parameter:

> A scalar or length-2 sequence giving the critical frequencies. For a Butterworth filter, this is the point at which the gain drops to 1/sqrt(2) that of the passband (the “-3 dB point”).
> For digital filters, Wn are in the same units as fs. By default, fs is 2 half-cycles/sample, so these are normalized from 0 to 1, where 1 is the Nyquist frequency. (Wn is thus in half-cycles / sample.)
> For analog filters, Wn is an angular frequency (e.g. rad/s).

Is Butterworth good for Time Series? Check out [this paper](https://amstat.tandfonline.com/doi/abs/10.1198/073500101681019909). Here's the abstract:

> Long-term trends and business cycles are usually estimated by applying the Hodrick and Prescott (HP) filter to X-11 seasonally adjusted data. A two-stage procedure is proposed in this article to improve this methodology. The improvement is based on (a) using Butterworth or band-pass filters specifically designed for the problem at hand as an alternative to the HP filter, (b) applying the selected filter to estimated trend cycles instead of to seasonally adjusted series, and (c) using autoregressive integrated moving average models to extend the input series with forecasts and backcasts.

### Negative Example

In [None]:
%%time
Wn = 0.50
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()

In [None]:
Wn = 0.05
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()

In [None]:
Wn = 0.01
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[0, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()

### Negative Example

In [None]:
%%time
Wn = 0.50
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()

In [None]:
Wn = 0.05
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()

In [None]:
Wn = 0.01
plt.figure(figsize=(15, 10))

for phase in range(3):
    sig = signals[1, phase, :]
    
    b, a = signal.butter(3, Wn)
    filtered = signal.filtfilt(b, a, sig)
    
    plt.plot(sig, label=f'Phase {phase} Raw')
    plt.plot(filtered, label=f'Phase {phase} Filtered')

plt.legend()
plt.title(f"Applying IIR Filtering with Butterworth, Wn: {Wn}", size=15)
plt.show()