In [None]:
import pandas as pd
from matplotlib import pyplot as plt

import numpy as np
import statsmodels.tsa.stattools as ts

## read example train features

In [None]:
downsample_pts = 1

In [None]:
def my_load(subj_ids:list, series_ids:list):
    features_all = []
    targets_all = []
    for i1 in subj_ids:
        for i2 in series_ids:
            for i3, fn in [
                ('features', 'data/raw/train/subj%i_series%i_data.csv'%(i1, i2)),
                ('targets', 'data/raw/train/subj%i_series%i_events.csv'%(i1, i2)),
            ]:
                print('status:', i1, i2, i3)
                xxx_i = pd.read_csv(fn)
                xxx_i['subj_id'] = i1
                xxx_i['series_id'] = i2
                xxx_i = xxx_i.set_index(['subj_id', 'series_id', 'id']).astype('int16')
                xxx_i = xxx_i[::downsample_pts] # downsample
                if i3=='features':
                    features_all.append(xxx_i)
                else:
                    targets_all.append(xxx_i)
            
    features_all = pd.concat(features_all, axis=0)
    targets_all = pd.concat(targets_all, axis=0)
    return features_all, targets_all

In [None]:
train_features, train_targets = my_load(subj_ids = [1], series_ids = [x+1 for x in range(8)])
train_features.shape, train_targets.shape

In [None]:
def adf2human(result):
    """
    utility function: ADF result for humans
    https://stats.stackexchange.com/questions/73921/how-to-interpret-the-results-of-adf-test-using-sas-arima#74508
    """
    return 'stationary' if result[1] <= 0.05 else ('could be non-stationary' if result[0] > -2.5 else 'stationary')

adf2human([10, .01]), adf2human([10, .08]), adf2human([-10, .01]), adf2human([-10, .08])

## ADF test on first 1k pts

result is 'not stationary'

In [None]:
k = train_features.columns[0]
x = train_features[k].head(n=1000)
# x -= x.reset_index().index*(600)/1000
x.reset_index()[k].plot()
plt.show()

In [None]:
result = ts.adfuller(x)
k, result, adf2human(result)

## pywt wavelet

In [None]:
import pywt

ecgsignal = x
[c, l]=pywt.wavedec(ecgsignal,8,'coif5'); 
a9=wrcoef('a',c,l,'coif5',8);
coeffs=pywt.wavedec(ecgsignal,'coif5', level=8)
renc=pywt.waverec(coeffs, 'coif5')

In [None]:
import pywt
cA, cD = pywt.dwt([1, 2, 3, 4], 'db1')
pywt.idwt(cA, cD, 'db1')

In [None]:
pywt.waverec([cA[:1], cD[:1]], [1,2,3,4])

## scipy wavelet

To replace with https://github.com/aaren/wavelets

In [None]:
from scipy import signal
import matplotlib.pyplot as plt
t = np.linspace(-1, 1, 200, endpoint=False)
sig  = np.cos(2 * np.pi * 7 * t) + signal.gausspulse(t - 0.4, fc=2)
widths = np.arange(1, 31)
cwtmatr = signal.cwt(sig, signal.ricker, widths)
plt.imshow(cwtmatr, extent=[-1, 1, 31, 1], cmap='PRGn', aspect='auto',
           vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max())
plt.show()

In [None]:
cwtmatr.shape, sig.shape

In [None]:
pd.DataFrame(cwtmatr).transpose().plot()
plt.show()

In [None]:
pd.Series(sig).plot()
plt.show()

In [None]:
pd.DataFrame(cwtmatr).transpose().sum(axis=1).plot()
plt.show()

## https://github.com/aaren/wavelets

In [None]:
from wavelets import WaveletAnalysis

# given a signal x(t)
x = np.random.randn(1000)
# and a sample spacing
dt = 0.1

wa = WaveletAnalysis(x, dt=dt)

# wavelet power spectrum
power = wa.wavelet_power

# scales 
scales = wa.scales

# associated time vector
t = wa.time

# reconstruction of the original data
rx = wa.reconstruction()

In [None]:
rx.shape

In [None]:
x.shape

In [None]:
rx[:5]

In [None]:
pd.DataFrame({'ori': x, 'rec': rx+5}).head(n=100).plot()
plt.show()

In [None]:
pd.DataFrame({'Diff': x-rx}).head(n=100).plot()
plt.show()

## apply on original feature

In [None]:
k = train_features.columns[0]
x = train_features[k].head(n=10000).values

# and a sample spacing
dt = 0.001

wa = WaveletAnalysis(x, dt=dt)

# wavelet power spectrum
power = wa.wavelet_power

# scales 
scales = wa.scales

# associated time vector
t = wa.time

# reconstruction of the original data
rx = wa.reconstruction()

In [None]:
pd.DataFrame({'ori': x, 'rec': np.real(rx)+5}).plot(figsize=(20,3))
plt.show()

In [None]:
pd.DataFrame({'diff': x-np.real(rx)}).plot(figsize=(20,3))
plt.show()

## ADF on wavelet reconstruction

still non-stationary, but closer to stationarity

In [None]:
result = ts.adfuller(np.real(rx))
k, result, adf2human(result)

In [None]:
result = ts.adfuller(x)
k, result, adf2human(result)

## make stationary piece-wise

In [None]:
k = train_features.columns[0]
rx = []
n_px = 1000
for i in range(10000//n_px):
    x = train_features[k].head(n=(i+1)*n_px).tail(n=n_px).values

    # and a sample spacing
    dt = 0.001

    wa = WaveletAnalysis(x, dt=dt)

    # reconstruction of the original data
    rx.append(np.real(wa.reconstruction()))
    
rx = np.concatenate(rx, axis=0)
rx.shape

In [None]:
x = train_features[k].head(n=10*1000).values
y = pd.DataFrame({'ori': x, 'rec': rx})

y.plot(figsize=(20,3))
plt.show()

y['diff'] = y['ori'] - y['rec']

y['diff'].plot(figsize=(20,3))
plt.show()

In [None]:
y.head(n=5000).tail(n=1000).plot(figsize=(20,3))
plt.show()


## subtract global mean

In [None]:
k = train_features.columns[0]
x = train_features[k].head(n=10*1000)
y = train_features.head(n=10*1000)
s = (y - y.min(axis=0)) / (y.max(axis=0) - y.min(axis=0)) # scale to 0-1
m = s.mean(axis=1)

n = s[k] - m

y = pd.DataFrame({'ori': x, 'mean': m, 'new': n})

y['ori'].plot(figsize=(20,3))
plt.show()

y['mean'].plot(figsize=(20,3))
plt.show()

y['new'].plot(figsize=(20,3))
plt.show()

## plot all

In [None]:
for k in train_features.columns:
    x = train_features[k].head(n=10*1000)
    x.plot()
    plt.title(k)
    plt.show()
    
    s[k].plot()
    plt.title(k)
    plt.show()


## correlation matrix

In [None]:
corr_df = s.corr(method='pearson')
corr_df.shape

In [None]:
import seaborn as sns
sns.heatmap(corr_df)
plt.show()

In [None]:
s[corr_df.loc['Fp1'][corr_df.loc['Fp1'] > 0.4].index].head(n=10000).plot(alpha=.5)
plt.show()

s[corr_df.loc['Fp1'][corr_df.loc['Fp1'] > 0.4].index].head(n=10000).mean(axis=1).plot()
plt.show()

In [None]:
df=s[corr_df.loc['Fp1'][corr_df.loc['Fp1'] > 0.4].index].head(n=10000)
df2 = df.values-df.mean(axis=1).values.reshape((-1,1))
df2 = pd.DataFrame(df2, index=df.index, columns=df.columns)
df.shape, df2.shape

In [None]:
df2.plot(alpha=.5)
plt.show()