## Import libraries

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import path
import sklearn
%matplotlib qt

## Variables to set

In [13]:
DATASET = "32FINAL.csv"
DATAPATH = path.join("..", "capture", "processed", DATASET)
SAMPLES_TO_VIEW = [2,240,500,780]

## Load dataset

In [14]:
dataset = pd.read_csv(DATAPATH)
# dataset

In [15]:
X = [i for i in range(100)]
print(X)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


## Generate a figure with plots of the initial samples

In [16]:
fig, axs = plt.subplots(len(SAMPLES_TO_VIEW), 7, sharex='col', figsize=(21,len(SAMPLES_TO_VIEW)*3))
for i in range(len(SAMPLES_TO_VIEW)):
    for j in range(7):
        y = dataset.iloc[i,j*100:(j+1)*100]
        axs[i,j].plot(X, y)
        if i == 0:
            axs[i,j].set_title(dataset.columns[j*100][:-1], fontsize=15)

fig.suptitle("Exemplo com "+str(len(SAMPLES_TO_VIEW))+" amostras", fontsize=25)            
plt.show()
fig.savefig(path.join("..","results","plot",DATASET[:-4]+".PNG"), dpi=300)

## Import extraction libraries
Using code from Navar M. M. Nascimento -> https://github.com/navarmn/feature_extraction_signal for Fourier and HOS extraction.
Using code from Geraldo Luís Bezerra Ramalho -> Private's Google Colaboratory for extraction using Structural Cooccurrence Matrix - SCM method.

In [17]:
from src.feature_extraction import Fourier, HOS
from src.SCM import SCM

In [18]:
fe_fourier25 = Fourier(fundamental=25.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_fourier30 = Fourier(fundamental=30.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_fourier35 = Fourier(fundamental=35.0, fs=100.0, harmonics=(1, 10, 20, 30))
fe_HOS = HOS()
fe_SCM = SCM()

Our dataset is composed by 1100 sensor captures (dataset rows). Each one has 100 samples per measures (AcX, AcY, AcZ, GyX, GyY, GyZ, Tmp). The Tmp values will be despized for this work. So, we need to extract each measure's vector and generate a new dataset with features.

In [19]:
# excluding Tmp columns
tmp_head = []
for i in range(100):
    tmp = "Tmp"+str(i)
    tmp_head.append(tmp)
if "Tmp0" in dataset.columns:
    dataset = dataset.drop(columns=tmp_head)
tmp_head.clear()
# dataset

In [20]:
# example to fourier extract
measures = ["AcX", "AcY", "AcZ", "GyX", "GyY", "GyZ"]
extractors = [fe_fourier25, fe_fourier25, fe_fourier25, fe_fourier30, fe_fourier30, fe_fourier35] # class 0 to 5

out_data_list = []
out_row_list = []

for row in range(len(dataset.index)):
    dataset_row = dataset.iloc[row,:]
    class_num = dataset_row['Class']
    dataset_fourier = dataset_row.values[:-1]
    dataset_fourier = pd.DataFrame(np.reshape(dataset_fourier, (100,6), order='F'), columns=measures)
    out_row_list.clear()
    for measure in measures:
        out_fourier = extractors[class_num].transform(dataset_fourier[measure])
        out_row_list.append(out_fourier['features'])
    out_row_list.append([class_num])
    row_list = [item for sublist in out_row_list for item in sublist]
    out_data_list.append(row_list)

In [21]:
out_head = []
for measure in measures:
    for i in range(4):
        column = measure+str(i)
        out_head.append(column)
out_head.append("Class")

out_data_list = np.asarray(out_data_list)
out_dataframe = pd.DataFrame(out_data_list, columns=out_head)
out_dataframe = out_dataframe.astype({"Class": int})
out_dataframe

Unnamed: 0,AcX0,AcX1,AcX2,AcX3,AcY0,AcY1,AcY2,AcY3,AcZ0,AcZ1,...,GyX3,GyY0,GyY1,GyY2,GyY3,GyZ0,GyZ1,GyZ2,GyZ3,Class
0,66.273575,557.034890,533.750425,1662.409696,714.043940,1063.676328,1823.786496,621.922382,133.259057,1122.969197,...,166.354900,18.508690,201.393279,135.738626,245.769616,5.352344,12.921246,34.524896,88.642785,0
1,339.394684,478.621250,545.752159,750.709074,270.541393,905.678159,2390.176091,1687.699397,512.432118,917.917874,...,167.164584,106.586298,159.396636,145.448909,106.102630,20.514505,35.070350,51.744396,47.481862,0
2,135.052773,834.643463,739.816046,1455.890825,715.798803,722.614294,2553.379937,1998.917990,268.190858,1048.039716,...,263.078974,37.461495,221.008952,182.066542,154.752064,8.802216,23.708717,35.765085,78.353068,0
3,259.927507,597.140990,459.281958,1283.331723,220.998913,1167.929177,2098.060579,1007.051497,324.153484,811.332890,...,167.380387,92.965324,198.555933,179.522509,236.903301,10.977111,21.516055,32.078070,77.915281,0
4,113.963485,140.515919,555.721815,521.917157,405.522518,804.150522,2475.667062,1424.120620,129.861472,524.394269,...,163.436539,21.930254,60.552474,159.762900,125.274629,5.897963,12.400345,36.912948,35.266496,0
5,137.550020,461.064251,684.869653,690.825428,41.072841,715.670632,2986.059094,1067.831640,225.182370,705.444734,...,129.339030,57.430777,133.418738,155.950262,85.986517,2.239067,18.710819,36.841518,21.573003,0
6,268.884956,649.955143,577.559256,669.056494,387.624208,2312.511351,2051.777974,1696.895935,236.936422,1102.207887,...,186.007646,12.456057,76.732546,165.254147,145.239794,1.206064,20.088410,49.759118,43.734974,0
7,178.260671,430.260429,685.925963,972.696751,115.764547,1221.022352,1587.243607,1169.992997,284.988813,638.861212,...,195.598584,44.716433,130.622712,151.774716,145.281704,1.787368,20.513943,38.164836,47.609239,0
8,70.219228,778.400094,779.097304,603.505834,973.153998,1566.906125,2508.244496,1802.250485,418.206989,1138.887383,...,175.465467,17.816600,248.025470,158.591898,209.390871,9.254233,21.413502,40.564755,36.667271,0
9,249.433308,310.021331,594.763221,1058.188677,356.238778,677.688125,2809.287329,998.864952,430.606173,557.966344,...,222.104040,27.017643,110.069500,148.711699,143.180782,5.279753,27.546055,44.318381,54.797044,0


In [23]:
plt.rcParams["figure.subplot.right"] = .8
pd.plotting.scatter_matrix(out_dataframe[["AcX0", "AcX1", "AcX2", "AcX3"]], figsize=(32,18), c=out_dataframe["Class"], 
                           s=50, alpha=.8, cmap='Set1')

handles = [plt.plot([], [], color=plt.cm.Set1(i), ls="", marker=".", markersize=np.sqrt(100))[0] for i in range(6)]

labels = ["25 NORMAL", "25 REVERSO", "25 VAZIO", "30 NORMAL", "30 VAZIO", "35 NORMAL"]
plt.legend(handles, labels, loc=(1.02,0))
plt.suptitle("32 - AcX - Fourier", fontsize=24)
plt.show()
plt.savefig(path.join("..","results","plot",DATASET[:-4]+"_AcX_Fourier.PNG"), dpi=300)