In [18]:
import pywt
import pandas as pd
import numpy as np

In [19]:
# function for feature extraction from raw data
# input: raw data from wave scan
# output: feature extracted from respective mother wavelets
def generate_features(raw_data,mother_wavelet):
    print('Generating features...\n')
    features = {}
    
    # Statistical features
    features['raw_mean'] = raw_data.abs().mean(axis=1)
    features['raw_std'] = raw_data.abs().std(axis=1)
    features['raw_range'] = raw_data.max(axis=1) - raw_data.min(axis=1)
    
    # Wavelet features
    wavelet_params = {'wavelet':mother_wavelet,'level':3} # wavelet decomposition parameters
    cA4, cD4, cD3, cD2 = pywt.wavedec(raw_data,wavelet_params['wavelet'],level = wavelet_params['level'])
    # Convert lists to dataframes
    cA4=pd.DataFrame(cA4)
    cD4=pd.DataFrame(cD4) 
    cD3=pd.DataFrame(cD3)
    cD2=pd.DataFrame(cD2)
    
    # Calculate mean for each coefficient
    features['mean1'] = cA4.abs().mean(axis=1)
    features['mean2'] = cD4.abs().mean(axis=1)
    features['mean3'] = cD3.abs().mean(axis=1)
    
    # Calculate standard deviation for each coefficient
    features['std1'] = cA4.abs().std(axis=1)
    features['std2'] = cD4.abs().std(axis=1)
    features['std3'] = cD3.abs().std(axis=1)
    
    # Calculate energy for each coefficient
    features['energy1'] = cA4.pow(2).mean(axis=1)
    features['energy2'] = cD4.pow(2).mean(axis=1)
    features['energy3'] = cD3.pow(2).mean(axis=1)

    # Probabilities for entropy calculation
    p1 = features['energy1']/sum([features['energy1'],features['energy2'],features['energy3']])
    p2 = features['energy2']/sum([features['energy1'],features['energy2'],features['energy3']])
    p3 = features['energy3']/sum([features['energy1'],features['energy2'],features['energy3']])

    # Calculate entropy feature
    features['entropy1'] = -p1*np.log2(p1)
    features['entropy2'] = -p2*np.log2(p2)
    features['entropy3'] = -p3*np.log2(p3)

    
    print('Feature extraction completed')
    return features

In [20]:
# reading the raw data
raw_data=pd.read_csv('raw data all.csv')

In [21]:
raw_data.head(2)

Unnamed: 0.1,Unnamed: 0,TP,Grade,Panel ID,Files,Set_1,Set_2,Set_3,Quality,0,...,950,951,952,953,954,955,956,957,958,959
0,0,BX0,C,BX_1-01,1-01_01.csv,1,1,1,High,430.960388,...,497.270714,465.837993,482.473171,557.146946,509.123724,393.085411,409.714919,530.855378,582.342847,537.530053
1,1,BX0,C,BX_1-01,1-01_02.csv,1,1,1,High,544.658394,...,564.696363,608.248301,638.900783,596.954494,535.648714,516.290616,506.61762,534.059142,484.054406,411.396488


In [22]:
# to select mother wavelet
# for instance i have selected rbio 3.1
mother_wavelet='rbio3.1'

# extracting features from the data
features_extract=generate_features(raw_data.loc[:,'0':],mother_wavelet)

Generating features...

Feature extraction completed


In [23]:
# feature extracted from rbio 3.1 mother wavelet
pd.DataFrame(features_extract).head()

Unnamed: 0,raw_mean,raw_std,raw_range,mean1,mean2,mean3,std1,std2,std3,energy1,energy2,energy3,entropy1,entropy2,entropy3
0,512.142021,62.380731,392.609349,1448.094137,64.364304,78.639752,58.549487,55.15948,57.540117,2100377.0,7160.39279,9481.394351,0.011296,0.027761,0.034946
1,512.324981,65.302357,391.979697,1448.14537,84.008165,86.058824,63.934224,61.94679,65.479062,2101179.0,10863.322551,11675.911756,0.01523,0.038932,0.041272
2,512.105324,68.187631,613.605033,1448.998552,85.962519,86.924827,63.437171,91.75148,65.360653,2103588.0,15738.886067,11810.287571,0.018529,0.052296,0.041538
3,513.213561,75.699622,789.475997,1451.584316,100.207443,90.035829,76.162066,108.942471,83.183132,2112850.0,21812.711164,14997.291211,0.024491,0.067202,0.049975
4,510.370327,78.578046,720.863984,1443.917813,92.608157,85.163836,99.450787,104.13865,88.644313,2094708.0,19332.236778,15078.222813,0.023127,0.06159,0.050576
