# **fext-peak: Extraction Peak Features**

## **Code Setup**

Imports (packages/modules)

In [15]:
# General imports
# -----------------------------

import os
import sys
import random
import numpy as np
from scipy import stats
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings


# Specific imports
# -----------------------------

sys.path.append('./modules/')
import utils_v1 as utils

# preprocessing
from sklearn.preprocessing import StandardScaler

# peak processing
from scipy.signal import find_peaks, peak_prominences, peak_widths




Functions/Classes

In [16]:
# def foo(par):
#     ...

#     return result

Running configurations

In [17]:
# matplotlib
%matplotlib inline

# pandas display
pd.options.display.float_format = "{:,.3f}".format

# random number generators
seed = 17

# ignore warnings
warnings.filterwarnings("ignore")



Running parameters

In [18]:
# param = ['nb_name', 'exp_name', ['list_imgs'], 'path_out']
# nb_name: name of this notebook
# exp_name: name of the experiment
# path_out: path to save the results
# path_in: path of the input

#default
param = ['fext-peak-v1',
        'PEI-raw',
        '../data/datasets/',
        '../data/datasets/raw-VOL-aggregate-v1_PEI-raw/raw-VOL-aggregate-v1_PEI-raw.csv',
        ]


In [19]:
nb_name = param[0]
exp_name = param[1]
exp_id = nb_name + '_' + exp_name
path_out = param[2] + exp_id + '/'
path_in = param[3]

print('nb_name=', nb_name)
print('exp_name=', exp_name)
print('path_out=', path_out)
print('path_in=', path_in)


nb_name= fext-peak-v1
exp_name= PEI-raw
path_out= ../data/datasets/fext-peak-v1_PEI-raw/
path_in= ../data/datasets/raw-VOL-aggregate-v1_PEI-raw/raw-VOL-aggregate-v1_PEI-raw.csv


Create output folder

In [20]:
# path to save outputs
utils.create_folder(path_out)


## **Dataset Preparation**

Load dataset

In [21]:
# df = pd.read_csv( path_in + dataset_name, usecols=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], header=None)

dataset = pd.read_csv( path_in, header=0, delimiter=',')
dataset



Unnamed: 0,-299.988,-285.034,-270.081,-255.127,-240.173,-225.22,-210.266,-195.313,-180.359,-165.405,...,686.951,701.904,716.858,731.812,746.765,761.719,776.672,791.626,matriz,class
0,0.805,0.539,0.413,0.332,0.272,0.216,0.166,0.109,0.051,0.004,...,4.155,4.486,4.81,5.146,5.482,5.819,6.149,6.473,Saliva,0
1,0.427,0.106,-0.012,-0.06,-0.08,-0.089,-0.094,-0.101,-0.103,-0.093,...,3.142,3.512,3.922,4.363,4.843,5.348,5.867,6.378,Saliva,0
2,0.231,-0.034,-0.132,-0.179,-0.203,-0.223,-0.236,-0.242,-0.235,-0.201,...,3.191,3.569,3.977,4.418,4.884,5.375,5.871,6.356,Saliva,0
3,1.236,0.871,0.7,0.59,0.511,0.439,0.369,0.288,0.202,0.122,...,4.889,5.277,5.671,6.071,6.474,6.873,7.27,7.652,Saliva,0
4,0.423,0.059,-0.069,-0.123,-0.144,-0.158,-0.168,-0.184,-0.194,-0.197,...,3.801,4.242,4.72,5.234,5.785,6.364,6.954,7.542,Saliva,0
5,0.345,-0.02,-0.138,-0.182,-0.19,-0.195,-0.194,-0.199,-0.203,-0.198,...,2.547,2.899,3.293,3.743,4.245,4.798,5.391,6.015,Saliva,0
6,0.42,0.086,-0.021,-0.055,-0.059,-0.056,-0.053,-0.06,-0.064,-0.061,...,2.093,2.383,2.719,3.106,3.546,4.049,4.605,5.203,Saliva,0
7,1.12,0.759,0.597,0.502,0.44,0.387,0.331,0.258,0.172,0.081,...,4.797,5.184,5.567,5.96,6.343,6.727,7.104,7.46,Saliva,0
8,0.54,0.178,0.059,0.015,-0.002,-0.014,-0.028,-0.049,-0.07,-0.069,...,3.858,4.294,4.764,5.267,5.801,6.353,6.917,7.469,Saliva,0
9,0.324,-0.014,-0.115,-0.142,-0.14,-0.135,-0.133,-0.146,-0.158,-0.155,...,2.863,3.216,3.609,4.057,4.553,5.096,5.678,6.281,Saliva,0


Data selection

In [22]:
# parameter
target = 'class'
data_agg = 1

dataset_original = dataset.copy()

# dataset selection (when dataset is very big)
# -----------------------
# dataset = dataset.groupby(target).sample(frac=0.1, random_state=seed)

# columns selection
# -----------------------
if data_agg == 1:
    exclude = -2
else:
    exclude = -1

features = dataset.columns.tolist()[:exclude]
dataset = dataset[features + [target]]

# rows selection
# -----------------------
# exclude one label
# dataset = dataset.loc[dataset[target] != 2]

# binary one-vs-one (OVO)
# dataset = pd.concat([dataset.loc[dataset[target] == 0], dataset.loc[dataset[target] == 10]], ignore_index=True) 

# binary one-vs-rest (OVR)
# label_one = 0
# dataset.loc[dataset[target] != label_one, target] = 1

# multi
# dataset = dataset

dataset


Unnamed: 0,-299.988,-285.034,-270.081,-255.127,-240.173,-225.22,-210.266,-195.313,-180.359,-165.405,...,671.997,686.951,701.904,716.858,731.812,746.765,761.719,776.672,791.626,class
0,0.805,0.539,0.413,0.332,0.272,0.216,0.166,0.109,0.051,0.004,...,3.828,4.155,4.486,4.81,5.146,5.482,5.819,6.149,6.473,0
1,0.427,0.106,-0.012,-0.06,-0.08,-0.089,-0.094,-0.101,-0.103,-0.093,...,2.799,3.142,3.512,3.922,4.363,4.843,5.348,5.867,6.378,0
2,0.231,-0.034,-0.132,-0.179,-0.203,-0.223,-0.236,-0.242,-0.235,-0.201,...,2.844,3.191,3.569,3.977,4.418,4.884,5.375,5.871,6.356,0
3,1.236,0.871,0.7,0.59,0.511,0.439,0.369,0.288,0.202,0.122,...,4.501,4.889,5.277,5.671,6.071,6.474,6.873,7.27,7.652,0
4,0.423,0.059,-0.069,-0.123,-0.144,-0.158,-0.168,-0.184,-0.194,-0.197,...,3.39,3.801,4.242,4.72,5.234,5.785,6.364,6.954,7.542,0
5,0.345,-0.02,-0.138,-0.182,-0.19,-0.195,-0.194,-0.199,-0.203,-0.198,...,2.228,2.547,2.899,3.293,3.743,4.245,4.798,5.391,6.015,0
6,0.42,0.086,-0.021,-0.055,-0.059,-0.056,-0.053,-0.06,-0.064,-0.061,...,1.831,2.093,2.383,2.719,3.106,3.546,4.049,4.605,5.203,0
7,1.12,0.759,0.597,0.502,0.44,0.387,0.331,0.258,0.172,0.081,...,4.411,4.797,5.184,5.567,5.96,6.343,6.727,7.104,7.46,0
8,0.54,0.178,0.059,0.015,-0.002,-0.014,-0.028,-0.049,-0.07,-0.069,...,3.45,3.858,4.294,4.764,5.267,5.801,6.353,6.917,7.469,0
9,0.324,-0.014,-0.115,-0.142,-0.14,-0.135,-0.133,-0.146,-0.158,-0.155,...,2.547,2.863,3.216,3.609,4.057,4.553,5.096,5.678,6.281,0


Data preparation

In [23]:
# # parameters
# feat = feat_all
# target = target_multi

# # select and prepare the dataset
# dataset.replace({target[0]: {'no': 0, 'yes': 1}}, inplace=True)
# dataset.replace({target[0]: {'control': 0, 'cavity': 1, 'floor': 2}}, inplace=True)



## **Feature Extraction**

Split features and labels

In [24]:
# x = dataset.iloc[:, :-1].values
# y = np.ravel(dataset[[target]].values)
# labels = np.unique(y).tolist()

# print('x shape:', x.shape)
# print('y shape:', y.shape)
# print('labels:', labels)


Compute peak features

In [25]:
loc_list=[]
band_list=[]
prom_list=[]
width_half_list=[]
width_full_list=[]
q_factor=[]

# find peaks and prominences
for i in range(len(dataset)):
    x = dataset.iloc[i, :]
    
    loc,_ = find_peaks(x)
    # print(loc)

    prominences = peak_prominences(x, loc)[0]
    # print(prominences)
    
    max_prom = np.max(prominences)
    # print(max_prom)
    # print([loc[np.argmax(prominences)]])

    others = np.delete(prominences,np.argmax(prominences))
    # print(others)

    if not np.where(others > max_prom/3)[0].any():

        band = x.index[loc[np.argmax(prominences)]]
        
        width_half_data = peak_widths(x, [loc[np.argmax(prominences)]], rel_height=0.5)
        width_half = float(x.index[int(width_half_data[3])]) - float(x.index[int(width_half_data[2])])
        # print(width_half)
        width_full_data = peak_widths(x, [loc[np.argmax(prominences)]], rel_height=1.0)
        width_full = float(x.index[int(width_full_data[3])]) - float(x.index[int(width_full_data[2])])
        # print(width_full)
        
        print(i, band, max_prom, width_half, width_full)
    
        band_list.append(band)
        prom_list.append(max_prom)
        width_half_list.append(width_half)
        width_full_list.append(width_full)


print('\nNumber of valid peaks: ', len(band_list))

peaks = pd.DataFrame({'band': np.array(band_list, dtype=float), 
                    'prominence': np.array(prom_list, dtype=float),
                    'width_half': np.array(width_half_list, dtype=float),
                    'width_full': np.array(width_full_list, dtype=float)})
peaks['q_factor'] = peaks['prominence']/peaks['width_half']
peaks


0 163.574 27.339000000000002 209.35000000000002 538.3299999999999
1 148.621 25.844 209.351 538.3299999999999
2 133.667 26.474 209.351 538.3299999999999
3 163.574 27.06 209.35000000000002 523.376
4 133.667 26.33 209.351 508.423
5 133.667 25.703 209.351 553.284
6 133.667 25.028 209.351 538.33
7 163.574 25.955 209.35000000000002 508.423
8 133.667 25.195 194.397 538.3299999999999
9 133.667 24.28 194.397 508.42199999999997
10 163.574 27.032 194.39700000000002 493.469
11 163.574 25.6 209.35000000000002 508.423
12 148.621 26.053 194.39700000000002 493.469
13 148.621 25.226 194.39700000000002 613.098
14 163.574 25.4 194.39700000000002 478.515
15 163.574 26.202 194.39700000000002 493.469
16 148.621 25.384 209.351 508.42199999999997
17 163.574 28.761000000000003 209.35000000000002 508.423
18 133.667 27.657 194.397 523.376
19 148.621 26.588 194.39700000000002 478.51599999999996
20 163.574 25.893 209.35000000000002 538.33
21 133.667 22.47 194.397 538.33
22 148.621 25.041 194.39700000000002 553.284

Unnamed: 0,band,prominence,width_half,width_full,q_factor
0,163.574,27.339,209.35,538.33,0.131
1,148.621,25.844,209.351,538.33,0.123
2,133.667,26.474,209.351,538.33,0.126
3,163.574,27.06,209.35,523.376,0.129
4,133.667,26.33,209.351,508.423,0.126
5,133.667,25.703,209.351,553.284,0.123
6,133.667,25.028,209.351,538.33,0.12
7,163.574,25.955,209.35,508.423,0.124
8,133.667,25.195,194.397,538.33,0.13
9,133.667,24.28,194.397,508.422,0.125


Save the features extracted

In [26]:
dataset_fext = pd.concat([peaks, dataset_original.iloc[:, exclude:]], axis=1)
dataset_fext.to_csv(path_out + exp_id + '.csv', index=False, float_format='%.3f')
dataset_fext



Unnamed: 0,band,prominence,width_half,width_full,q_factor,matriz,class
0,163.574,27.339,209.35,538.33,0.131,Saliva,0
1,148.621,25.844,209.351,538.33,0.123,Saliva,0
2,133.667,26.474,209.351,538.33,0.126,Saliva,0
3,163.574,27.06,209.35,523.376,0.129,Saliva,0
4,133.667,26.33,209.351,508.423,0.126,Saliva,0
5,133.667,25.703,209.351,553.284,0.123,Saliva,0
6,133.667,25.028,209.351,538.33,0.12,Saliva,0
7,163.574,25.955,209.35,508.423,0.124,Saliva,0
8,133.667,25.195,194.397,538.33,0.13,Saliva,0
9,133.667,24.28,194.397,508.422,0.125,Saliva,0
