# Abalone Project - Model Classification
Maria Eugênia Fonseca\
2021/10/05

In [45]:
import pandas as pd
import mlflow

from sklearn.metrics import accuracy_score, f1_score
from sklearn.naive_bayes import CategoricalNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [21]:
X_train = pd.read_csv("../data/processed/abalone_xtrain_class.csv")
y_train = pd.read_csv("../data/processed/abalone_ytrain_class.csv")

X_test = pd.read_csv("../data/processed/abalone_xtest_class.csv")
y_test = pd.read_csv("../data/processed/abalone_ytest_class.csv")

In [22]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.540541,0.521008,0.09375,0.176023,0.155346,0.115866,0.153961,1.0,0.0,0.0
1,0.601351,0.621849,0.125,0.287232,0.258238,0.264648,0.227703,0.0,1.0,0.0
2,0.736486,0.714286,0.151786,0.399681,0.354741,0.348914,0.303438,1.0,0.0,0.0
3,0.378378,0.394958,0.071429,0.115282,0.089442,0.112574,0.088191,0.0,0.0,1.0
4,0.452703,0.436975,0.075893,0.098636,0.075992,0.044766,0.108122,1.0,0.0,0.0


In [23]:
y_train.head()

Unnamed: 0,age
0,middle age
1,middle age
2,middle age
3,middle age
4,middle age


#### Start mlflow experiment:

In [24]:
EXPERIMENT_NAME = "abalone_classification"

try:
    mlflow.create_experiment(EXPERIMENT_NAME)
    print("CREATING")
except:
    print("ALREADY EXISTS")

mlflow.set_experiment(EXPERIMENT_NAME)

ALREADY EXISTS


#### Function to evaluate classification and log metrics in mlflow:

In [34]:
def evaluate_classification_and_log(classification_model, X_train, y_train, X_test, y_test, params):    
    # metrics - train
    y_train_pred = classification_model.predict(X_train)
    accuracy_train = accuracy_score(y_train, y_train_pred)
    f1_train = f1_score(y_train, y_train_pred, average='weighted')

    # metrics - test
    y_test_pred = classification_model.predict(X_test)
    accuracy_test = accuracy_score(y_test, y_test_pred)
    f1_test = f1_score(y_test, y_test_pred, average='weighted')
    
    # log metrics    
    mlflow.start_run()

    mlflow.log_metric("accuracy_train", accuracy_train)
    mlflow.log_metric("f1_train", f1_train)
    mlflow.log_metric("accuracy_test", accuracy_test)
    mlflow.log_metric("f1_test", f1_test)

    mlflow.sklearn.log_model(classification_model, "model")
    mlflow.log_param("model_name", type(classification_model).__name__)
    mlflow.log_params(params)
    
    mlflow.end_run()

#### First model - CategoricalNB

In [7]:
params_pipe = {
    "minmax_scaller": "true"}

In [35]:
nb = CategoricalNB().fit(X_train, y_train)

evaluate_classification_and_log(nb, X_train, y_train, X_test, y_test, params_pipe)

  return f(*args, **kwargs)


#### Second model - KNeighborsClassifier

In [38]:
knn = KNeighborsClassifier().fit(X_train, y_train)

evaluate_classification_and_log(knn, X_train, y_train, X_test, y_test, params_pipe)

  return self._fit(X, y)


In [42]:
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

#### Third model - KNeighborsClassifier with hyperparameter tuning

In [40]:
params_pipe = {
    "minmax_scaller": "true",
    "hyperparameter_tuning": "true"}

In [52]:
n_neighbors = [4, 5, 6, 7, 8]
weights = ['uniform', 'distance']

# Create the grid
search_grid = {'n_neighbors': n_neighbors,
               'weights': weights}

In [53]:
knn = KNeighborsClassifier()

# Grid search of parameters, using 3 fold cross validation, 
knn_tuning = GridSearchCV(estimator=knn, param_grid=search_grid, cv=3, n_jobs=-1)

# Fit the random search model
knn_tuning.fit(X_train, y_train)

  return self._fit(X, y)


GridSearchCV(cv=3, estimator=KNeighborsClassifier(), n_jobs=-1,
             param_grid={'n_neighbors': [4, 5, 6, 7, 8],
                         'weights': ['uniform', 'distance']})

In [54]:
knn_tuning.best_params_

{'n_neighbors': 7, 'weights': 'uniform'}

In [55]:
knn_tuned = knn_tuning.best_estimator_

evaluate_classification_and_log(knn_tuned, X_train, y_train, X_test, y_test, params_pipe)

#### Fourth model - 