## Import libraries

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import path
import sklearn
%matplotlib qt

## Variables to set

In [32]:
DATASET = "32FINAL.csv"
DATAPATH = path.join("..", "capture", "processed", DATASET)
SAMPLES_TO_VIEW = [2,240,500,780]

## Load dataset

In [33]:
dataset = pd.read_csv(DATAPATH)
# dataset

In [34]:
X = [i for i in range(100)]
print(X)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


## Generate a figure with plots of the initial samples

In [35]:
fig, axs = plt.subplots(len(SAMPLES_TO_VIEW), 7, sharex='col', figsize=(21,len(SAMPLES_TO_VIEW)*3))
for i in range(len(SAMPLES_TO_VIEW)):
    for j in range(7):
        y = dataset.iloc[i,j*100:(j+1)*100]
        axs[i,j].plot(X, y)
        if i == 0:
            axs[i,j].set_title(dataset.columns[j*100][:-1], fontsize=15)

fig.suptitle("Exemplo com "+str(len(SAMPLES_TO_VIEW))+" amostras", fontsize=25)            
plt.show()
fig.savefig(path.join("..","results","extraction","plot",DATASET[:-4]+".PNG"), dpi=300)

# Feature Extraction
## Import extraction libraries
Using code from Navar M. M. Nascimento -> https://github.com/navarmn/feature_extraction_signal for Fourier and HOS extraction.
Using code from Geraldo Luís Bezerra Ramalho -> Private's Google Colaboratory for extraction using Structural Cooccurrence Matrix - SCM method.

In [36]:
from src.feature_extraction import Fourier, HOS
from src.SCM import SCM

In [37]:
fe_fourier25 = Fourier(fundamental=25.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_fourier30 = Fourier(fundamental=30.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_fourier35 = Fourier(fundamental=35.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_HOS = HOS()
fe_SCM = SCM()

Our dataset is composed by 1100 sensor captures (dataset rows). Each one has 100 samples per measures (AcX, AcY, AcZ, GyX, GyY, GyZ, Tmp). The Tmp values will be despized for this work. So, we need to extract each measure's vector and generate a new dataset with features.

In [38]:
# excluding Tmp columns
tmp_head = []
for i in range(100):
    tmp = "Tmp"+str(i)
    tmp_head.append(tmp)
if "Tmp0" in dataset.columns:
    dataset = dataset.drop(columns=tmp_head)
tmp_head.clear()
# dataset

## Fourier extraction
Fourier's extractor receive as parameters: fundamental frequency, sampling frequency and harmonics (multiples) of the input signal. Each class needs to be extracted for different configurations of the object, since classes differ themselves by fundamental frequency.

Each measure (AcX, AcY, AcZ, GyX, GyY and GyZ) are extracted separated, since they are different signals, with 100 values. After extraction, will be 4 attributes for each measure, resulting in a dataset with 1100 rows and 25 columns ((4 attributes X 6 measures) + Class).

In [39]:
def fourier_extraction(dataset, extractors, measures):
    out_data_list = []
    out_row_list = []

    for row in range(len(dataset.index)):
        dataset_row = dataset.iloc[row,:]
        class_num = dataset_row['Class']
        dataset_fourier = dataset_row.values[:-1]
        dataset_fourier = pd.DataFrame(np.reshape(dataset_fourier, (100,6), order='F'), columns=measures)
        out_row_list.clear()
        for measure in measures:
            out_fourier = extractors[class_num].transform(dataset_fourier[measure])
            out_row_list.append(out_fourier['features'])
        out_row_list.append([class_num])
        row_list = [item for sublist in out_row_list for item in sublist]
        out_data_list.append(row_list)

    out_head = []
    for measure in measures:
        for i in range(4):
            column = measure+str(i)
            out_head.append(column)
    out_head.append("Class")

    out_data_list = np.asarray(out_data_list)
    out_dataframe = pd.DataFrame(out_data_list, columns=out_head, index=None)
    out_dataframe = out_dataframe.astype({"Class": int})
    
    return out_dataframe

## HOS extraction
Higher-Order Statistics extractor do not receive parameters. All classes will be extracted in the same way. The resulting feature vector corresponds to four statistics of the input signal: rms, variance, skewness and the kurtosis.

Each measure (AcX, AcY, AcZ, GyX, GyY and GyZ) are extracted separated, since they are different signals, with 100 values. After extraction, will be 4 attributes for each measure, resulting in a dataset with 1100 rows and 25 columns ((4 attributes X 6 measures) + Class).

In [40]:
def HOS_extraction(dataset, extractor, measures):
    out_data_list = []
    out_row_list = []

    for row in range(len(dataset.index)):
        dataset_row = dataset.iloc[row,:]
        class_num = dataset_row['Class']
        dataset_HOS = dataset_row.values[:-1]
        dataset_HOS = pd.DataFrame(np.reshape(dataset_HOS, (100,6), order='F'), columns=measures)
        out_row_list.clear()
        for measure in measures:
            out_HOS = extractor.transform(dataset_HOS[measure])
            out_row_list.append(out_HOS['features'])
        out_row_list.append([class_num])
        row_list = [item for sublist in out_row_list for item in sublist]
        out_data_list.append(row_list)

    out_head = []
    for measure in measures:
        for i in range(4):
            column = measure+str(i)
            out_head.append(column)
    out_head.append("Class")

    out_data_list = np.asarray(out_data_list)
    out_dataframe = pd.DataFrame(out_data_list, columns=out_head, index=None)
    out_dataframe = out_dataframe.astype({"Class": int})
    
    return out_dataframe

## Extraction general

After call all functions and generate dataframes with corresponding features, CSV's of each dataframe would be saved in destiny folder.

In [41]:
measures = ["AcX", "AcY", "AcZ", "GyX", "GyY", "GyZ"]
fourier_extractors = [fe_fourier25, fe_fourier25, fe_fourier25, fe_fourier30, fe_fourier30, fe_fourier35] # class 0 to 5

fourier_dataframe = fourier_extraction(dataset, fourier_extractors, measures)
HOS_dataframe = HOS_extraction(dataset, fe_HOS, measures)
# SCM_dataframe = SCM_extraction(dataset, fe_SCM, measures)

fourier_dataframe.to_csv(path.join("..","results","extraction", DATASET[:-4]+"_Fourier.csv"), sep=",", index=False)
HOS_dataframe.to_csv(path.join("..","results","extraction", DATASET[:-4]+"_HOS.csv"), sep=",", index=False)
# SCM_dataframe.to_csv(path.join("..","results","extraction", DATASET[:-4]+"_SCM.csv"), sep=",", index=False)

# Features plotting

Now, the features will be displayed, and the plot will be saved. That plot consists in a scatter matrix plot, which show a dispersion of elements, comparing pairs of features of a selected measure.

In [42]:
def discrete_cmap(N, base_cmap=None):
    base = plt.cm.get_cmap(base_cmap)
    color_list = base(np.linspace(0, 1, N))
    cmap_name = base.name + str(N)
    return base.from_list(cmap_name, color_list, N)

def plot_features(data_name, dataframe, classes, features, features_name, extractor_name):
    plt.rcParams["figure.subplot.right"] = .8

    fig = pd.plotting.scatter_matrix(out_dataframe[[f for f in features]], figsize=(24,13), 
                                     c=out_dataframe["Class"], label=classes, s=100, alpha=.8, 
                                     cmap=discrete_cmap(6, 'rainbow'))



    handles = [plt.plot([], [], color=discrete_cmap(6, 'rainbow')(i), ls="", marker=".", 
                        markersize=np.sqrt(50))[0] for i in range(6)]

    plt.legend(handles, classes, loc=(1.02,0))
    plt.suptitle(data_name + " - " + features_name + " - " + extractor_name, fontsize=24)
    plt.show()
    plt.savefig(path.join("..","results","extraction","plot",data_name+"_"+features_name+"_"+extractor_name+".png"), 
                dpi=300)

## Plotting general

In [43]:
labels = ["25 NORMAL", "25 REVERSO", "25 VAZIO", "30 NORMAL", "30 VAZIO", "35 NORMAL"]
features = ["AcX0", "AcX1", "AcX2", "AcX3"]
features_name = "AcX"

plot_features(DATASET[:-4], fourier_dataframe, labels, features, features_name, "Fourier")
plot_features(DATASET[:-4], HOS_dataframe, labels, features, features_name, "HOS")
# plot_features(DATASET[:-4], SCM_dataframe, labels, features, features_name, "SCM")