In [9]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
# !pip install PyWavelets

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import glob

import h5py
import flammkuchen as fl
import seaborn as sns
from matplotlib.cm import ScalarMappable
import scipy
from scipy import stats
import pywt

In [12]:
wavelets = ['db1', 'db2', 'haar', 'coif1', 'sym2'] # List of wavelets to test

def extract_wavelet_features(signal, wavelet='db1'):
    #The 'db1' wavelet (Daubechies wavelet with one vanishing moment)
    #is used by default, but you can replace this with the name of any wavelet supported by PyWavelets.
    #This function returns the approximation coefficients (cA)- low pass and detail coefficients (cD)- high pass as separate numpy arrays.
    # These coefficients can be used as features for your machine learning model
    
    # Perform wavelet decomposition
    coeffs = pywt.dwt(signal, wavelet)
    # Separate the approximation and detail coefficients
    cA, cD = coeffs
    return cA, cD

## Load data

In [13]:
master_path = Path(r'\\portulab.synology.me\data\Kata\testdata\Raw_Data')

fish_paths = list(master_path.glob('*f[0-9]*'))
fish_paths

[WindowsPath('//portulab.synology.me/data/Kata/testdata/Raw_Data/240423_f0')]

In [16]:
fish= 0
fish_id =  fish_paths[fish].name
exp_name = Path(fish_paths[fish]).parts[-2]
exp_name = 'testfish'
fish_id, exp_name


('240423_f0', 'testfish')

In [17]:
out_path = Path(r'\\portulab.synology.me\data\Kata\testdata\Processed_Data')
save_data_path = out_path

In [18]:
traces =  fl.load(out_path / '{}_tensor.h5'.format(fish_id))
traces.shape

(500, 3, 50)

In [20]:
tail = traces[:, 0, :]
fin_left = traces[:, 1, :]
fin_right = traces[:, 2, :]


## How were the wavelets extarcted?

In [22]:
n_w = 3

print (wavelets[n_w])
# Assume 'data' is your time series of shape (1,50)
l_cA, l_cD = extract_wavelet_features(fin_left, wavelet = wavelets[n_w])
r_cA, r_cD = extract_wavelet_features(fin_right, wavelet = wavelets[n_w])
t_cA, t_cD = extract_wavelet_features(tail, wavelet = wavelets[n_w])

l_cA.shape

coif1


(500, 27)

In [23]:
wavelet_feature_namesA = [ 't_wf_cA_{}'.format(i) for i in range(t_cA.shape[1])]
wavelet_feature_namesD = [ 't_wf_cD_{}'.format(i) for i in range(t_cD.shape[1])]
wavelet_feature_names = wavelet_feature_namesA + wavelet_feature_namesD

In [24]:
wf_tail_features = np.hstack([t_cA, t_cD])
wf_tail_features.shape

(500, 54)

In [25]:
wf_tail_features = pd.DataFrame(wf_tail_features, columns=wavelet_feature_names)
wf_tail_features.head()

Unnamed: 0,t_wf_cA_0,t_wf_cA_1,t_wf_cA_2,t_wf_cA_3,t_wf_cA_4,t_wf_cA_5,t_wf_cA_6,t_wf_cA_7,t_wf_cA_8,t_wf_cA_9,...,t_wf_cD_17,t_wf_cD_18,t_wf_cD_19,t_wf_cD_20,t_wf_cD_21,t_wf_cD_22,t_wf_cD_23,t_wf_cD_24,t_wf_cD_25,t_wf_cD_26
0,-0.002818,-0.015071,-0.028747,-0.349729,-1.074231,-1.402951,-0.668198,0.233381,0.422797,-0.31276,...,-0.113901,-0.006884,0.089483,0.037861,-0.037192,-0.028998,-0.000994,0.003434,-0.00233,0.008642
1,-0.06436,-0.052751,-0.029729,0.255921,0.5026,0.118457,-0.596962,-0.604856,0.156607,0.668136,...,-0.028635,-0.07527,-0.011133,0.034808,0.020842,-0.005986,0.002198,0.010562,-0.023591,0.006647
2,-0.049273,-0.034648,-0.076598,0.088695,0.976949,1.064677,-0.248431,-0.543976,0.284758,0.479143,...,-0.086356,0.082167,0.046671,-0.045812,-0.031681,0.015004,0.00357,-1.1e-05,-3.3e-05,0.000886
3,0.000411,-0.051397,-0.052672,-0.66359,-1.421754,-1.49598,-0.611951,0.341956,0.345268,-0.641421,...,0.027572,0.046657,-0.010905,-0.006921,-0.000893,-0.01075,0.003278,0.000923,-0.001634,0.001948
4,-0.080979,-0.05106,-0.048506,0.389373,1.017737,0.844298,-0.26617,-0.652975,0.203645,0.742897,...,-0.112947,0.04388,0.06333,-0.003697,-0.022695,-0.013006,-0.007726,-0.000612,0.003726,-0.000128


In [26]:
l_wavelet_feature_namesA = [ 'l_wf_cA_{}'.format(i) for i in range(l_cA.shape[1])]
l_wavelet_feature_namesD = [ 'l_wf_cD_{}'.format(i) for i in range(l_cD.shape[1])]
r_wavelet_feature_namesA = [ 'r_wf_cA_{}'.format(i) for i in range(r_cA.shape[1])]
r_wavelet_feature_namesD = [ 'r_wf_cD_{}'.format(i) for i in range(r_cD.shape[1])]
wavelet_feature_names = l_wavelet_feature_namesA + l_wavelet_feature_namesD + r_wavelet_feature_namesA + r_wavelet_feature_namesD

In [27]:
wf_fin_features = np.hstack([l_cA, l_cD, r_cA, r_cD])
wf_fin_features.shape

(500, 108)

In [28]:
wf_fin_features = pd.DataFrame(wf_fin_features, columns=wavelet_feature_names)
wf_fin_features.head()

Unnamed: 0,l_wf_cA_0,l_wf_cA_1,l_wf_cA_2,l_wf_cA_3,l_wf_cA_4,l_wf_cA_5,l_wf_cA_6,l_wf_cA_7,l_wf_cA_8,l_wf_cA_9,...,r_wf_cD_17,r_wf_cD_18,r_wf_cD_19,r_wf_cD_20,r_wf_cD_21,r_wf_cD_22,r_wf_cD_23,r_wf_cD_24,r_wf_cD_25,r_wf_cD_26
0,-0.019675,-0.006439,-0.028336,-0.139065,-0.673647,-1.112129,-0.548813,-0.074127,-0.041731,-0.03737,...,-0.000138,0.000934,-0.004255,0.002175,-0.007284,0.021147,-0.005577,-0.004395,0.009767,-0.003306
1,-0.204661,-0.006477,-0.464672,-0.704969,-0.320982,-0.061285,-0.290439,-0.540259,-0.343523,-0.127601,...,0.000952,-0.014595,0.010728,0.003631,0.010556,0.003349,-0.001436,-0.004901,0.009774,-0.004256
2,0.005362,-0.019129,-0.004739,-0.456326,-1.038276,-0.504261,-0.053017,-0.256229,-0.295961,-0.112582,...,0.002999,-0.006674,0.000187,0.008188,0.003047,-0.004664,-0.012862,0.000395,0.000251,-0.001388
3,-0.007269,0.00415,-0.012815,0.042691,-0.073271,-0.316219,-0.072158,0.089198,0.085534,0.086732,...,-0.00443,-0.010846,0.013536,0.002796,-0.005157,0.010795,0.007577,0.00173,-0.006381,0.000449
4,-0.065307,0.015173,-0.259564,-0.819128,-0.758369,-0.316572,-0.195918,-0.430821,-0.519057,-0.228078,...,-0.010637,0.005617,0.013406,-0.002015,0.012208,-0.010743,-0.005937,0.002216,-0.002428,0.004298


In [32]:
wf_fin_features.to_csv(Path(out_path/ '{}_wf_fin_features.csv'.format(fish_id)))