In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, cross_validate, cross_val_score, train_test_split
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import warnings
warnings.filterwarnings("ignore")

### Data Cleaning

In [22]:
iris = pd.read_csv(os.path.join('Dados', 'iris', 'iris.csv'))
glass = pd.read_csv(os.path.join('Dados', 'glass', 'glass.csv'))

In [23]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [24]:
iris_species = iris.species

In [25]:
iris = iris.drop(columns='species')

In [69]:
iris_train, iris_test, iris_species_train, iris_species_test = train_test_split(iris, iris_species, test_size=0.33, random_state=42)

In [59]:
iris_train.head()

Unnamed: 0,index,sepal_length,sepal_width,petal_length,petal_width
0,96,5.7,2.9,4.2,1.3
1,105,7.6,3.0,6.6,2.1
2,66,5.6,3.0,4.5,1.5
3,0,5.1,3.5,1.4,0.2
4,122,7.7,2.8,6.7,2.0


In [28]:
glass.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [29]:
glass_type = glass.Type
glass = glass.drop(columns='Type')

In [68]:
glass_train, glass_test, glass_type_train, glass_type_test = train_test_split(glass, glass_type, test_size=0.33, random_state=42)

In [31]:
glass_train.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
35,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0
28,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0
42,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0
82,1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0
100,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22


### Perceptron

In [12]:
perceptron = Perceptron(penalty='elasticnet', random_state=42)

In [13]:
parameters = {'max_iter':[50, 100, 500, 1000, 2000, 3000], 'class_weight':['balanced', None], 'alpha':[0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}

In [14]:
clf = GridSearchCV(perceptron, parameters, cv=10)

Treino e resultados para o dataset iris

In [15]:
results = clf.fit(iris_train, iris_species_train)

In [16]:
results.best_params_

{'alpha': 1e-05, 'class_weight': None, 'max_iter': 50}

In [17]:
perceptron = Perceptron(penalty='elasticnet', random_state=42, max_iter=50, class_weight=None, alpha=0.00001)

In [18]:
perceptron.fit(iris_train, iris_species_train)

Perceptron(alpha=1e-05, class_weight=None, early_stopping=False, eta0=1.0,
      fit_intercept=True, max_iter=50, n_iter=None, n_iter_no_change=5,
      n_jobs=None, penalty='elasticnet', random_state=42, shuffle=True,
      tol=None, validation_fraction=0.1, verbose=0, warm_start=False)

In [19]:
def get_scores(model, x, y):
    y_pred = model.predict(x)
    return (accuracy_score(y, y_pred), 
            precision_score(y, y_pred, pos_label=3, average='macro'), 
            recall_score(y, y_pred, pos_label=3, average='macro'),
            f1_score(y, y_pred, pos_label=3, average='macro'))
            

Perceptron + Iris

In [20]:
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(perceptron, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.70
Precision = 0.48
Recall = 0.67
F1 = 0.55


In [32]:
perceptron = Perceptron(penalty='elasticnet', random_state=42)
parameters = {'max_iter':[50, 100, 500, 1000, 2000, 3000], 'class_weight':['balanced', None], 'alpha':[0.00001, 0.0001, 0.001, 0.01, 0.1, 1]}
clf = GridSearchCV(perceptron, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)

In [33]:
results.best_params_

{'alpha': 1e-05, 'class_weight': None, 'max_iter': 100}

Perceptron + Glass

In [34]:
perceptron = Perceptron(penalty='elasticnet', random_state=42, max_iter=100, class_weight=None, alpha=0.00001)
perceptron.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(perceptron, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.07
Precision = 0.18
Recall = 0.19
F1 = 0.07


### MLP

In [35]:
mlp = MLPClassifier(random_state=42)
parameters = {'max_iter':[50, 100, 500, 1000, 2000], 'activation':['logistic', 'tanh', 'relu'], 
              'solver': ['sgd', 'adam'], 'hidden_layer_sizes': [(100, 1), (100, 2), (100, 3)],
              'learning_rate': ['constant', 'invscaling', 'adaptive']}
clf = GridSearchCV(mlp, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)

In [36]:
results.best_params_

{'activation': 'logistic',
 'hidden_layer_sizes': (100, 3),
 'learning_rate': 'constant',
 'max_iter': 500,
 'solver': 'adam'}

MLP + Iris

In [70]:
mlp = MLPClassifier(random_state=42, activation='logistic', hidden_layer_sizes=(100, 3), learning_rate='constant', max_iter=500, solver='adam')
mlp.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(mlp, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 1.00
Precision = 1.00
Recall = 1.00
F1 = 1.00


In [72]:
mlp = MLPClassifier(random_state=42)
parameters = {'max_iter':[50, 100, 500, 1000, 2000], 'activation':['logistic', 'tanh', 'relu'], 
              'solver': ['sgd', 'adam'], 'hidden_layer_sizes': [(100, 1), (100, 2), (100, 3)],
              'learning_rate': ['constant', 'invscaling', 'adaptive']}
clf = GridSearchCV(mlp, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)

In [73]:
results.best_params_

{'activation': 'tanh',
 'hidden_layer_sizes': (100, 3),
 'learning_rate': 'constant',
 'max_iter': 1000,
 'solver': 'adam'}

MLP + glass

In [75]:
mlp = MLPClassifier(random_state=42, activation='tanh', hidden_layer_sizes=(100, 3), learning_rate='constant', max_iter=1000, solver='adam')
mlp.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(mlp, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.62
Precision = 0.55
Recall = 0.53
F1 = 0.54


### SVM

### Iris

Kernel linear One-Versus-One

In [76]:
svc = SVC(random_state=42, kernel='linear', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 1}


In [77]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='linear', C=1)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 1.00
Precision = 1.00
Recall = 1.00
F1 = 1.00


Kernel linear One-Versus-Rest

In [78]:
svc = SVC(random_state=42, kernel='linear', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 1}


In [79]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='linear', C=1)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 1.00
Precision = 1.00
Recall = 1.00
F1 = 1.00


Kernel RBF One-Versus-One

In [80]:
svc = SVC(random_state=42, kernel='rbf', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 4}


In [81]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='rbf', C=4)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 1.00
Precision = 1.00
Recall = 1.00
F1 = 1.00


Kernel RBF One-Versus-Rest

In [82]:
svc = SVC(random_state=42, kernel='rbf', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 4}


In [83]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='rbf', C=4)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 1.00
Precision = 1.00
Recall = 1.00
F1 = 1.00


Kernel Polynomial One-Versus-One

In [84]:
svc = SVC(random_state=42, kernel='poly', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16, 32, 64, 72]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 1}


In [85]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='poly', C=1)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.98
Precision = 0.98
Recall = 0.98
F1 = 0.98


Kernel Polynomial One-Versus-Rest

In [86]:
svc = SVC(random_state=42, kernel='poly', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16, 32, 64, 72]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(iris_train, iris_species_train)
print(results.best_params_)

{'C': 1}


In [87]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='poly', C=1)
svc.fit(iris_train, iris_species_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, iris_test, iris_species_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.98
Precision = 0.98
Recall = 0.98
F1 = 0.98


### Glass

Kernel linear One-Versus-One

In [91]:
svc = SVC(random_state=42, kernel='linear', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 8}


In [92]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='linear', C=8)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.69
Precision = 0.61
Recall = 0.60
F1 = 0.60


Kernel linear One-Versus-Rest

In [94]:
svc = SVC(random_state=42, kernel='linear', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 8}


In [95]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='linear', C=8)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.69
Precision = 0.61
Recall = 0.60
F1 = 0.60


Kernel RBF One-Versus-One

In [97]:
svc = SVC(random_state=42, kernel='rbf', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 8}


In [98]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='rbf', C=8)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.69
Precision = 0.60
Recall = 0.59
F1 = 0.59


Kernel RBF One-Versus-Rest

In [99]:
svc = SVC(random_state=42, kernel='rbf', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 8}


In [100]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='rbf', C=8)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.69
Precision = 0.60
Recall = 0.59
F1 = 0.59


Kernel Polynomial One-Versus-One

In [106]:
svc = SVC(random_state=42, kernel='poly', decision_function_shape='ovo')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16, 32]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 16}


In [107]:
svc = SVC(random_state=42, decision_function_shape='ovo', kernel='poly', C=16)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])

Accuracy = 0.66
Precision = 0.62
Recall = 0.64
F1 = 0.62


Kernel Polynomial One-Versus-Rest

In [108]:
svc = SVC(random_state=42, kernel='poly', decision_function_shape='ovr')
parameters = {'C':[0.0001, 0.001, 0.1, 1, 2, 4, 8, 16, 32]}
clf = GridSearchCV(svc, parameters, cv=10)
results = clf.fit(glass_train, glass_type_train)
print(results.best_params_)

{'C': 16}


In [None]:
svc = SVC(random_state=42, decision_function_shape='ovr', kernel='poly', C=4242)
svc.fit(glass_train, glass_type_train)
scores_dict = dict(zip(['Accuracy', 'Precision', 'Recall', 'F1'], get_scores(svc, glass_test, glass_type_test)))
for name in scores_dict: print(name + ' = %.2f' % scores_dict[name])