In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme()
sns.set_context(font_scale=1.5)

df = pd.read_csv('../data/data.csv')

# remove informações que não são extraídas dos sinais de vibração
df.pop('rotacao_manual')
df.pop('severidade')

# exibe lista de características
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 880 entries, 0 to 879
Columns: 116 entries, condicao to tg2_freqstat_rvf
dtypes: float64(115), object(1)
memory usage: 797.6+ KB


In [2]:
output_map = {
    'normal': 0,
    'imbalance': 1,
    'vertical-misalignment': 2,
    'horizontal-misalignment': 3,
}
output_remap = {v: k for k, v in output_map.items()}

# armazena output em um array
labels = np.array(df['condicao'].map(output_map))

# remove a coluna de labels do df original
df = df.drop('condicao', axis = 1)

In [3]:
phase_cols = [col for col in df.columns if 'phase' in col]
df_angles = df[phase_cols + ['rotacao_calc']]
df_angles.head()

Unnamed: 0,ax1_phase,ax2_phase,rad1_phase,rad2_phase,tg1_phase,tg2_phase,rotacao_calc
0,1.395359,0.384207,1.194332,0.800194,1.412233,0.088716,12.0
1,1.425566,0.31049,1.588248,0.435814,1.538576,0.072926,13.2
2,1.401096,0.302284,1.199344,0.417754,1.151517,0.063044,14.2
3,1.378789,0.461306,1.013469,0.596626,1.360171,0.221532,15.0
4,1.395547,0.402701,1.146666,0.504448,-2.899924,0.213993,16.0


In [4]:
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold, cross_val_score, cross_validate, cross_val_predict
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, LinearSVC

kfold = StratifiedKFold(n_splits=16, shuffle=True, random_state=42)
clf = make_pipeline(StandardScaler(), SVC(kernel='linear', class_weight='balanced'))

evaluation = cross_val_score(
    estimator= clf,
    X=df_angles.values, y=labels,
    cv=kfold,
    scoring='balanced_accuracy')

acc_mean = evaluation.mean()
acc_std = evaluation.std()

print(f'Acurácia: {acc_mean:.3f} ± {acc_std:.3f}')

Acurácia: 0.603 ± 0.075


In [5]:
df_encoded_angles = pd.concat([
    df[phase_cols].apply(np.sin).rename(columns = lambda x: x+'_sine'), 
    df[phase_cols].apply(np.cos).rename(columns = lambda x: x+'_cosine'),
    df['rotacao_calc']
], axis=1)
df_encoded_angles

Unnamed: 0,ax1_phase_sine,ax2_phase_sine,rad1_phase_sine,rad2_phase_sine,tg1_phase_sine,tg2_phase_sine,ax1_phase_cosine,ax2_phase_cosine,rad1_phase_cosine,rad2_phase_cosine,tg1_phase_cosine,tg2_phase_cosine,rotacao_calc
0,0.984650,0.374824,0.929970,0.717491,0.987455,0.088600,0.174539,0.927096,0.367634,0.696567,0.157900,0.996067,12.0
1,0.989473,0.305525,0.999848,0.422149,0.999481,0.072861,0.144720,0.952184,-0.017451,0.906527,0.032215,0.997342,13.2
2,0.985635,0.297702,0.931801,0.405709,0.913383,0.063002,0.168887,0.954659,0.362969,0.914003,0.407102,0.998013,14.2
3,0.981623,0.445118,0.848672,0.561855,0.977900,0.219725,0.190830,0.895472,0.528920,0.827236,0.209072,0.975562,15.0
4,0.984683,0.391905,0.911397,0.483324,-0.239323,0.212363,0.174353,0.920006,0.411528,0.875441,-0.970940,0.977191,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,0.999433,-0.363731,0.194634,-0.115206,0.999853,0.304295,-0.033657,0.931504,0.980876,0.993342,0.017173,0.952578,56.6
876,0.998683,-0.178324,0.174913,-0.049070,0.999852,0.281608,-0.051309,0.983972,0.984584,0.998795,-0.017219,0.959530,57.4
877,0.999780,-0.102606,0.149904,0.028560,0.999668,0.276031,-0.020986,0.994722,0.988701,0.999592,-0.025766,0.961149,58.2
878,0.998854,-0.023315,0.135994,-0.024186,0.995891,0.257793,-0.047867,0.999728,0.990710,0.999707,-0.090563,0.966200,59.0


In [6]:
evaluation = cross_val_score(
    estimator= clf,
    X=df_encoded_angles.values, y=labels,
    cv=kfold,
    scoring='balanced_accuracy')

acc_mean = evaluation.mean()
acc_std = evaluation.std()

print(f'Acurácia: {acc_mean:.3f} ± {acc_std:.3f}')

Acurácia: 0.786 ± 0.089
