# Feature Enginnering

Based on time series data, extract some time-domain, frequency-domain and time-frequency domain features.  

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### Single feature extraction function

In [None]:
import numpy as np
import scipy.stats as sts
from pywt import WaveletPacket

# time-domain
def rms_fea(a):
	return np.sqrt(np.mean(np.square(a)))
 
def pp_fea(a):
	return np.max(a)-np.min(a)
 
def skew_fea(a):
	return sts.skew(a)

# frequency-domain
def spectral_kurt(a):
	N= a.shape[0]
	mag = np.abs(np.fft.fft(a))
	mag	= mag[1:int(N/2)]*2.00/N
	return sts.kurtosis(mag)

def spectral_skw(a):
	N= a.shape[0]
	mag = np.abs(np.fft.fft(a))
	mag	= mag[1:int(N/2)]*2.00/N
	return sts.skew(mag)

def spectral_pow(a):
	N= a.shape[0]
	mag = np.abs(np.fft.fft(a))
	mag	= mag[1:int(N/2)]*2.00/N
	return np.mean(np.power(mag, 3))


#time-frequency domain
def wavedb1_fea(a):
	wp = WaveletPacket(a,'db1', maxlevel=8)
	nodes = wp.get_level(8, "freq")
	return np.linalg.norm(np.array([n.data for n in nodes]), 2)

def wavedb2_fea(a):
	wp = WaveletPacket(a,'db2', maxlevel=8)
	nodes = wp.get_level(8, "freq")
	return np.linalg.norm(np.array([n.data for n in nodes]), 2)
 
def wavedb3_fea(a):
	wp = WaveletPacket(a,'db3', maxlevel=8)
	nodes = wp.get_level(8, "freq")
	return np.linalg.norm(np.array([n.data for n in nodes]), 2)

### Wrap Up the above functions





In [None]:
def extract_fea(data_slice):
	# input: time_len -> num_stat
  data_fea = []
  data_fea.append(rms_fea(data_slice))
  data_fea.append(pp_fea(data_slice))
  data_fea.append(skew_fea(data_slice))
  data_fea.append(spectral_kurt(data_slice))
  data_fea.append(spectral_skw(data_slice))
  data_fea.append(spectral_pow(data_slice))
  data_fea.append(wavedb1_fea(data_slice))
  data_fea.append(wavedb2_fea(data_slice))
  data_fea.append(wavedb3_fea(data_slice))
  num_stat = len(data_fea)
  data_fea = np.array(data_fea)
  return data_fea.reshape((1,num_stat))

### Apply the feature extraction function on time windows of the time series data

In [None]:
import pandas as pd
import h5py
df_data = pd.read_csv("/content/drive/My Drive/data/data_cwru/cwru_data.txt")

In [None]:
labels = []
features = []
sub_len = 512
for idx in range(len(df_data)):
  raw_data = df_data.iloc[idx].data.strip()
  labels.append(raw_data.split(" ")[0])
  raw_data = np.array([float(ele) for ele in raw_data.split(" ")[1:]])
  feature = []
  for sub_idx in range(0,len(raw_data), sub_len):
    feature.append(extract_fea(raw_data[sub_idx:sub_idx+sub_len]))
  features.append(feature)

### Save the data

The final input features will be a tensor with three dimensions.

In [None]:
features = np.array(features)
labels   = np.array(labels)
h5f = h5py.File("/content/drive/My Drive/data/data_cwru/datax_series.h5", "w")
h5f.create_dataset("data", data=features)
h5f.close()

In [None]:
h5f = h5py.File("/content/drive/My Drive/data/data_cwru/datay_series.h5", "w")
h5f.create_dataset("label", data=labels)
h5f.close()