In [12]:
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import pywt # Python wavelet transform implementation
from sklearn.base import BaseEstimator, TransformerMixin # Interfaces and base classes for pipeline components
class WaveletTransformer(TransformerMixin, BaseEstimator):
    """Compute approximation coefficients of a selected wavelet.
    
    Parameters
    ----------
    wavelet_name : str, default='db1'
        Wavelet to use in transformation.
        Must be a wavelet name defiend in PyWavelets library
        See http://wavelets.pybytes.com/
    mode : str, default='symmetric'
        Extrapolation mode for transform.
        See https://pywavelets.readthedocs.io/en/latest/ref/signal-extension-modes.html#ref-modes
    Attributes
    ----------
    n_features_ : int
        The number of features of the data passed to :meth:`fit`.
    wavelet_name : str, default='db1'
        Wavelet to use in transformation.
        See http://wavelets.pybytes.com/
    mode : str, default='symmetric'
        Extrapolation mode for transform.
        See https://pywavelets.readthedocs.io/en/latest/ref/signal-extension-modes.html#ref-modes
    
    """
    def __init__(self,
                 wavelet_name: str = 'db1',
                 mode: str = 'symmetric'):
        self.wavelet_name = wavelet_name
        self.mode = mode

    def fit(self, X, y=None):
        """A reference implementation of a fitting function for a transformer.
        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : None
            There is no need of a target in a transformer, yet the pipeline API
            requires this parameter.
        Returns
        -------
        self : object
            Returns self.
        """
        X = check_array(X, accept_sparse=True)

        # Each row of X must have the same length
        # In other words, signals need to be truncated or padded to a fixed length
        # prior to passing to this transformer.
        self.n_features_ = X.shape[1]

        # Other checks go here
        
        # Return the transformer
        return self

    def transform(self, X):
        """ Compute wavelet transform on input data X
        
        Parameters
        ----------
        X : {array-like, sparse-matrix}, shape (n_samples, n_features)
            The input samples.
        Returns
        -------
        X_transformed : array, shape (n_samples, n_features)
            The array containing the wavelet transform approximation coefficients from each row of X
            in ``X``.
        """
        # Check is fit had been called
        check_is_fitted(self, 'n_features_')

        # Input validation
        X = check_array(X, accept_sparse=True)

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape[1] != self.n_features_:
            raise ValueError('Shape of input is different from what was seen'
                             'in `fit`')
            
        (cA, cD) = pywt.dwt(X, self.wavelet_name, self.mode)
        return cA

In [18]:
from random import randrange
import numpy as np
def make_random_signal(length=1000):
    """
    Make a wave and add random noise
    The signal is composed of x/100 and x/20. The amplitude and horizontal shift is randomly chosen
    """
    shift = randrange(101) # Random int between 0 and 100
    amplitude_f1 = randrange(0,1001) # Amplitude of frequency 1
    amplitude_f2 = randrange(0,1001) # Amplitude of frequency 2
    raw_signal = np.array([amplitude_f1*np.sin((x+shift)/100) + amplitude_f2*np.cos((x+shift)/20) for x in range(0,length)])
    noisy_signal = raw_signal + np.random.normal(0, 25, 1000)
    return noisy_signal

# Generate 10 noisy signals
signals = [make_random_signal() for i in range(0,10)]


In [14]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
pipe = Pipeline([('scaler', MinMaxScaler()), ('wavelet', WaveletTransformer())]) 

In [15]:
result = pipe.fit_transform(signals)

In [16]:
result.shape

(10, 500)