In [37]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [38]:
features = pd.read_csv("../../out/features.csv")

In [39]:
features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,category
0,-1.866155,-2.111297,-1.011725,-1.158407,1.244334,-1.545715,0.368160,-1.942656,0.702723,0.305284,...,0.061809,-0.036369,0.023494,-0.009220,-0.066321,0.038249,0.070535,0.024349,0.005778,6
1,-2.309306,-1.703304,0.142748,-1.708657,0.319805,-2.694813,0.486067,-1.604623,3.054992,1.419839,...,-0.029100,0.032033,0.037577,0.025453,0.023832,-0.003538,0.101292,0.039720,-0.022648,6
2,0.168463,0.244632,-0.496008,-1.427117,-1.639664,-0.345347,0.669508,-0.385264,-0.392475,2.676673,...,0.017109,-0.019128,0.043889,-0.006713,0.002625,0.026193,0.041263,-0.019899,-0.024759,2
3,-2.605365,-0.684324,-2.633072,-2.131571,3.337641,-2.503922,2.366286,-1.749033,5.492798,-0.464725,...,0.021697,-0.008594,0.034994,0.027209,0.023326,-0.025674,0.022643,0.015697,-0.038146,6
4,-2.690236,-0.326968,-1.631052,-1.509669,2.927241,-2.060199,2.118226,-1.327456,5.261209,-0.324815,...,0.009017,-0.012609,0.028447,0.025465,0.014082,-0.004661,0.015755,0.017003,-0.002421,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1552,-0.208434,1.117251,-1.967570,-1.185062,-0.864493,0.781926,-0.965572,-0.756572,0.453102,0.588441,...,0.038981,-0.051203,0.025235,-0.031357,0.002765,-0.032618,-0.017671,0.036682,0.027853,0
1553,-0.866219,-1.506001,-1.335598,1.062113,0.066953,0.632289,0.211077,-1.656070,1.159170,0.880240,...,0.038867,0.004851,-0.009503,0.016853,0.012388,-0.034577,-0.000382,-0.009720,-0.064584,5
1554,0.201909,0.870386,0.503976,-0.651510,-0.609128,0.468242,-0.851535,-0.833080,-0.517684,0.250927,...,-0.027906,0.016595,-0.081966,-0.000670,0.020941,0.006636,0.057863,-0.083909,0.024154,4
1555,0.862612,0.757643,-1.512518,0.483035,-0.653575,0.121529,-0.939392,-0.756565,-0.049691,-0.475161,...,0.004071,-0.013782,-0.006691,0.004129,0.040990,0.034266,-0.000057,0.002062,-0.006136,4


In [40]:
features["category"].unique()

array([6, 2, 0, 1, 4, 5, 3])

In [41]:
soma_colunas = features.sum()
colunas_soma_zero = soma_colunas[soma_colunas.eq(0)]

numero_colunas_soma_zero = len(colunas_soma_zero)

print(numero_colunas_soma_zero)

0


In [42]:
features[features < 0] = 0

In [43]:
kf = KFold(n_splits=5, shuffle=True)
split = kf.split(features.iloc[:,0:99], features.iloc[:,100])

In [44]:
kf = KFold(n_splits=5, shuffle=True)
split = kf.split(features.iloc[:, 0:99], features.iloc[:, 100])
fscores, precisions, recalls = [], [], []

acc = []
precision = []
recall = []
w_avg_f = []
m_avg_f = []

w_avg_p = []
m_avg_p = []

w_avg_r = []
m_avg_r = []

for train_index, test_index in split:
  
    X_train, Y_train = features.loc[train_index].iloc[:,
                                                        :99], features.loc[train_index].iloc[:, 100]
    x_test, y_test = features.loc[test_index].iloc[:,
                                                    :99], features.loc[test_index].iloc[:, 100]

    model = svm.SVC(
        kernel="linear", decision_function_shape='ovo',  class_weight="balanced")
    model.fit(X_train, Y_train)

    y_predicted = model.predict(x_test)
    
    precision, recall, fscore, support = score(y_test, y_predicted)
    acc.append(accuracy_score(y_test, y_predicted))

    fscores.append(fscore)
    precisions.append(precision)
    recalls.append(recall)

    w_avg_f.append(f1_score(y_test, y_predicted, average='weighted'))
    m_avg_f.append(f1_score(y_test, y_predicted, average='macro'))

    w_avg_p.append(precision_score(
        y_test, y_predicted, average='weighted'))
    m_avg_p.append(precision_score(y_test, y_predicted, average='macro'))

    w_avg_r.append(recall_score(y_test, y_predicted, average='weighted'))
    m_avg_r.append(recall_score(y_test, y_predicted, average='macro'))
    class_labels = sorted(set(y_test))


In [45]:
name_columns = [x for x in class_labels]
metrics_f = pd.DataFrame(fscores, columns = name_columns)
metrics_p = pd.DataFrame(precisions, columns = name_columns)
metrics_r = pd.DataFrame(recalls, columns = name_columns)

metrics_f["accuracy"] = acc
metrics_f["macro avg"] = m_avg_f
metrics_f["weighted avg"] = w_avg_f

metrics_p["weighted avg"] = w_avg_p
metrics_p["macro avg"] = m_avg_p

metrics_r["weighted avg"] = w_avg_r
metrics_r["macro avg"] = m_avg_r

print("Métricas precision:")
display(metrics_p)

print("Métricas recall:")
display(metrics_r)

print("Métricas fscore:")
display(metrics_f)

Métricas precision:


Unnamed: 0,0,1,2,3,4,5,6,weighted avg,macro avg
0,0.166667,0.066667,0.5,0.1,0.0,0.459016,0.0625,0.361635,0.19355
1,0.216667,0.044118,0.358491,0.147059,0.054054,0.516129,0.0,0.325657,0.190931
2,0.157895,0.040816,0.377778,0.052632,0.102564,0.410714,0.074074,0.29974,0.173782
3,0.146341,0.021277,0.569444,0.032258,0.0,0.45098,0.086957,0.397646,0.186751
4,0.090909,0.078431,0.470588,0.102564,0.172414,0.358974,0.263158,0.323768,0.219577


Métricas recall:


Unnamed: 0,0,1,2,3,4,5,6,weighted avg,macro avg
0,0.166667,0.142857,0.254386,0.307692,0.0,0.282828,0.181818,0.233974,0.190893
1,0.254902,0.157895,0.17757,0.555556,0.222222,0.166667,0.0,0.185897,0.219259
2,0.183673,0.142857,0.165049,0.181818,0.235294,0.221154,0.153846,0.189711,0.183384
3,0.146341,0.066667,0.308271,0.125,0.0,0.258427,0.285714,0.244373,0.17006
4,0.1875,0.190476,0.280702,0.333333,0.25,0.16092,0.2,0.22508,0.22899


Métricas fscore:


Unnamed: 0,0,1,2,3,4,5,6,accuracy,macro avg,weighted avg
0,0.166667,0.090909,0.337209,0.150943,0.0,0.35,0.093023,0.233974,0.169822,0.272393
1,0.234234,0.068966,0.2375,0.232558,0.086957,0.251969,0.0,0.185897,0.158883,0.210684
2,0.169811,0.063492,0.22973,0.081633,0.142857,0.2875,0.1,0.189711,0.153575,0.216715
3,0.146341,0.032258,0.4,0.051282,0.0,0.328571,0.133333,0.244373,0.155969,0.293259
4,0.122449,0.111111,0.351648,0.156863,0.204082,0.222222,0.227273,0.22508,0.199378,0.248613
