In [14]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import requests

In [25]:
print('\n - Lendo o arquivo com o dataset sobre abalone')
data = pd.read_csv('abalone_dataset.csv')
print(data)


 - Lendo o arquivo com o dataset sobre abalone
     sex  length  diameter  height  whole_weight  shucked_weight  \
0      M   0.535     0.420   0.150        0.6995          0.2575   
1      I   0.510     0.380   0.115        0.5155          0.2150   
2      I   0.185     0.130   0.045        0.0290          0.0120   
3      M   0.550     0.450   0.170        0.8100          0.3170   
4      I   0.535     0.415   0.150        0.5765          0.3595   
...   ..     ...       ...     ...           ...             ...   
3127   F   0.545     0.405   0.175        0.9800          0.2585   
3128   M   0.655     0.525   0.185        1.2590          0.4870   
3129   I   0.450     0.340   0.120        0.4925          0.2410   
3130   F   0.520     0.410   0.155        0.7270          0.2910   
3131   F   0.640     0.480   0.195        1.1435          0.4915   

      viscera_weight  shell_weight  type  
0             0.1530        0.2400     3  
1             0.1135        0.1660     1  
2     

In [26]:
# Mudando a coluna "sex" de variavel categorica para variavel continua
# Sexo masculino - valor 0
# Sexo feminino - valor 1
# Sexo infantil - valor 2
data["sex"].replace({"M": 0, "F": 1, "I": 2}, inplace=True)
data["sex"].astype(int)
# Renomeando a coluna "type" para typ, pois "type" he uma palavra
# reservada do python
data.rename(columns={'type': 'typ'}, inplace=True)
print(data)

      sex  length  diameter  height  whole_weight  shucked_weight  \
0       0   0.535     0.420   0.150        0.6995          0.2575   
1       2   0.510     0.380   0.115        0.5155          0.2150   
2       2   0.185     0.130   0.045        0.0290          0.0120   
3       0   0.550     0.450   0.170        0.8100          0.3170   
4       2   0.535     0.415   0.150        0.5765          0.3595   
...   ...     ...       ...     ...           ...             ...   
3127    1   0.545     0.405   0.175        0.9800          0.2585   
3128    0   0.655     0.525   0.185        1.2590          0.4870   
3129    2   0.450     0.340   0.120        0.4925          0.2410   
3130    1   0.520     0.410   0.155        0.7270          0.2910   
3131    1   0.640     0.480   0.195        1.1435          0.4915   

      viscera_weight  shell_weight  typ  
0             0.1530        0.2400    3  
1             0.1135        0.1660    1  
2             0.0075        0.0095    1  
3  

In [27]:
# Normalizando os dados
cols_to_norm = ['length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 
                'viscera_weight', 'shell_weight']
data[cols_to_norm] = data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
print(data)

      sex    length  diameter    height  whole_weight  shucked_weight  \
0       0  0.621622  0.613445  0.291262      0.247034        0.172495   
1       2  0.587838  0.546218  0.223301      0.181866        0.143914   
2       2  0.148649  0.126050  0.087379      0.009563        0.007397   
3       0  0.641892  0.663866  0.330097      0.286170        0.212508   
4       2  0.621622  0.605042  0.291262      0.203471        0.241089   
...   ...       ...       ...       ...           ...             ...   
3127    1  0.635135  0.588235  0.339806      0.346379        0.173167   
3128    0  0.783784  0.789916  0.359223      0.445192        0.326833   
3129    2  0.506757  0.478992  0.233010      0.173721        0.161399   
3130    1  0.601351  0.596639  0.300971      0.256774        0.195024   
3131    1  0.763514  0.714286  0.378641      0.404285        0.329859   

      viscera_weight  shell_weight  typ  
0           0.200790      0.237668    3  
1           0.148782      0.163926    1

In [28]:
print(' - Criando X e y para o algoritmo de aprendizagem a partir do arquivo abalone_dataset')
# Caso queira modificar as colunas consideradas basta alterar o array a seguir.
feature_cols = ['sex', 'length', 'diameter', 'height', 
                'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight']
X = data[feature_cols]
y = data.typ

# Criando o modelo preditivo do k-NN para a base trabalhada
print(' - Criando modelo preditivo')
neigh = KNeighborsClassifier(n_neighbors=15)
neigh.fit(X, y)

# Dataset to numpy array
arr_X = X.to_numpy()
arr_y = y.to_numpy()

 - Criando X e y para o algoritmo de aprendizagem a partir do arquivo abalone_dataset
 - Criando modelo preditivo


In [29]:
# Utilizando o k-fold cross validation para avaliar o modelo gerado
# pelo k-NN
n_folds = 3
kf = KFold(n_splits=n_folds, shuffle=True)

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    neigh.fit(x_train_fold, y_train_fold)
    predictions = neigh.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1
  

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.74      0.78       383
           2       0.47      0.62      0.53       312
           3       0.71      0.58      0.64       349

    accuracy                           0.65      1044
   macro avg       0.67      0.65      0.65      1044
weighted avg       0.68      0.65      0.66      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.71      0.76      0.74       356
           2       0.50      0.53      0.51       345
           3       0.68      0.58      0.63       343

    accuracy                           0.63      1044
   macro avg       0.63      0.62      0.63      1044
weighted avg       0.63      0.63      0.63      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.69      0.76      0.72       339
         

In [30]:
# Utilizando o algoritmo SVM para gerar o modelo
# e avalinado com o k-fold cross validation 
clf = svm.SVC(decision_function_shape='ovo')

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf.fit(x_train_fold, y_train_fold)
    predictions = clf.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1



Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.78      0.73      0.75       369
           2       0.49      0.48      0.49       334
           3       0.64      0.70      0.67       341

    accuracy                           0.64      1044
   macro avg       0.64      0.64      0.64      1044
weighted avg       0.64      0.64      0.64      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.70      0.72       340
           2       0.48      0.57      0.52       348
           3       0.68      0.59      0.63       356

    accuracy                           0.62      1044
   macro avg       0.63      0.62      0.62      1044
weighted avg       0.63      0.62      0.62      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.78      0.69      0.73       369
         

In [31]:
# Utilizando o algoritmo SVM para gerar o modelo
# e avalinado com o k-fold cross validation 
C = 1.0
clf2 = svm.SVC(kernel='linear', C=C)

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf2.fit(x_train_fold, y_train_fold)
    predictions = clf2.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.76      0.75       374
           2       0.50      0.52      0.51       337
           3       0.65      0.61      0.63       333

    accuracy                           0.63      1044
   macro avg       0.63      0.63      0.63      1044
weighted avg       0.63      0.63      0.63      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.70      0.80      0.75       353
           2       0.50      0.57      0.53       322
           3       0.73      0.54      0.62       369

    accuracy                           0.64      1044
   macro avg       0.64      0.64      0.63      1044
weighted avg       0.65      0.64      0.63      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.69      0.76      0.72       351
         

In [32]:
## Utilizando o algoritmo SVM para gerar o modelo
# e avalinado com o k-fold cross validation 
C = 1.0
clf3 = svm.SVC(kernel='rbf', gamma=0.7, C=C)

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf3.fit(x_train_fold, y_train_fold)
    predictions = clf3.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.79      0.71      0.75       358
           2       0.48      0.60      0.53       329
           3       0.71      0.62      0.66       357

    accuracy                           0.64      1044
   macro avg       0.66      0.64      0.65      1044
weighted avg       0.66      0.64      0.65      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.77      0.68      0.72       372
           2       0.45      0.43      0.44       345
           3       0.54      0.64      0.59       327

    accuracy                           0.59      1044
   macro avg       0.59      0.58      0.58      1044
weighted avg       0.59      0.59      0.59      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.74      0.71      0.72       348
         

In [18]:
## Utilizando o algoritmo SVM para gerar o modelo
# e avalinado com o k-fold cross validation 
clf4 = svm.LinearSVC(C=C, max_iter=10000)

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf4.fit(x_train_fold, y_train_fold)
    predictions = clf4.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.72      0.80      0.76       369
           2       0.50      0.42      0.46       342
           3       0.63      0.66      0.64       333

    accuracy                           0.63      1044
   macro avg       0.62      0.63      0.62      1044
weighted avg       0.62      0.63      0.62      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.68      0.81      0.74       356
           2       0.50      0.45      0.47       316
           3       0.70      0.64      0.67       372

    accuracy                           0.64      1044
   macro avg       0.63      0.63      0.63      1044
weighted avg       0.63      0.64      0.63      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.66      0.79      0.72       353
         

In [19]:
clf5 = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(10, 5), max_iter = 10000, random_state=1)

fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf5.fit(x_train_fold, y_train_fold)
    predictions = clf5.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.73      0.81      0.77       345
           2       0.56      0.51      0.53       359
           3       0.66      0.64      0.65       340

    accuracy                           0.65      1044
   macro avg       0.65      0.65      0.65      1044
weighted avg       0.65      0.65      0.65      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.78      0.69      0.73       360
           2       0.48      0.54      0.51       324
           3       0.71      0.70      0.70       360

    accuracy                           0.65      1044
   macro avg       0.65      0.64      0.65      1044
weighted avg       0.66      0.65      0.65      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.79      0.76      0.77       373
         

In [73]:
# Teste com k-NN
# Mudando a coluna "sex" de variavel categorica para variavel continua
# Sexo masculino - valor 0
# Sexo feminino - valor 1
# Sexo infantil - valor 2
data_app["sex"].replace({"M": 0, "F": 1, "I": 2}, inplace=True)
data_app["sex"].astype(int)

# Normalizando os dados
data_app[cols_to_norm] = data_app[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

#realizando previsões com o arquivo de teste
print(' - Aplicando modelo e enviando para o servidor')

data_app = data_app[feature_cols]
y_pred = neigh.predict(data_app)

print(y_pred)

 - Aplicando modelo e enviando para o servidor
[3 2 3 ... 2 1 2]


In [74]:
# Enviando previsões realizadas com o modelo para o servidor
URL = "https://aydanomachado.com/mlclass/03_Validation.php"

#TODO Substituir pela sua chave aqui
DEV_KEY = "MLTL"

# json para ser enviado para o servidor
data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

# Enviando requisição e salvando o objeto resposta
r = requests.post(url = URL, data = data)

# Extraindo e imprimindo o texto da resposta
pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")

 - Resposta do servidor:
 {"status":"success","dev_key":"MLTL","accuracy":0.5741626794258373,"old_accuracy":0} 



In [20]:
# Teste com NN
# Mudando a coluna "sex" de variavel categorica para variavel continua
# Sexo masculino - valor 0
# Sexo feminino - valor 1
# Sexo infantil - valor 2
data_app = pd.read_csv('abalone_app.csv')
data_app["sex"].replace({"M": 0, "F": 1, "I": 2}, inplace=True)
data_app["sex"].astype(int)

# Normalizando os dados
data_app[cols_to_norm] = data_app[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

#realizando previsões com o arquivo de teste
print(' - Aplicando modelo e enviando para o servidor')

data_app = data_app[feature_cols]
y_pred = clf5.predict(data_app)

print(y_pred)

 - Aplicando modelo e enviando para o servidor
 - Aplicando modelo e enviando para o servidor
[3 2 3 ... 1 1 2]


In [21]:
# Enviando previsões realizadas com o modelo para o servidor
URL = "https://aydanomachado.com/mlclass/03_Validation.php"

#TODO Substituir pela sua chave aqui
DEV_KEY = "MLTL"

# json para ser enviado para o servidor
data = {'dev_key':DEV_KEY,
        'predictions':pd.Series(y_pred).to_json(orient='values')}

# Enviando requisição e salvando o objeto resposta
r = requests.post(url = URL, data = data)

# Extraindo e imprimindo o texto da resposta
pastebin_url = r.text
print(" - Resposta do servidor:\n", r.text, "\n")

 - Resposta do servidor:
 {"status":"success","dev_key":"MLTL","accuracy":0.583732057416268,"old_accuracy":0.57416267942584} 



In [10]:
clf6 = tree.DecisionTreeClassifier()

n_folds = 3
kf = KFold(n_splits=n_folds, shuffle=True)
fold = 1
for train_index, test_index in kf.split(arr_X, arr_y):
    x_train_fold, x_test_fold = arr_X[train_index], arr_X[test_index]
    y_train_fold, y_test_fold = arr_y[train_index], arr_y[test_index]
    clf6.fit(x_train_fold, y_train_fold)
    predictions = clf6.predict(x_test_fold)
    print("Fold", fold,"results: ")
    print("Classification Report:")
    print(classification_report(y_test_fold,predictions))
    fold = fold + 1

Fold 1 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.65      0.70      0.67       339
           2       0.44      0.43      0.44       353
           3       0.60      0.56      0.58       352

    accuracy                           0.56      1044
   macro avg       0.56      0.57      0.56      1044
weighted avg       0.56      0.56      0.56      1044

Fold 2 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.71      0.70      0.70       377
           2       0.42      0.43      0.42       324
           3       0.58      0.58      0.58       343

    accuracy                           0.58      1044
   macro avg       0.57      0.57      0.57      1044
weighted avg       0.58      0.58      0.58      1044

Fold 3 results: 
Classification Report:
              precision    recall  f1-score   support

           1       0.72      0.66      0.69       362
         