In [1]:
!pip install -U scikit-learn

Requirement already up-to-date: scikit-learn in /usr/local/lib/python3.6/dist-packages (0.20.3)


In [5]:
# Code to predict class using different classifiers
# Also, tuning parameters using gridsearch and pipeline

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

import warnings

wine = datasets.load_wine()
X = wine.data
y = wine.target
_, n_features = X.shape

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# Specifying classifiers and their parameters for grid search
classifiers = [GaussianNB(), SVC(), RandomForestClassifier(), MLPClassifier()]
classifier_params = [{}, {'svc__C': [1e3, 5e3, 1e4, 5e4, 1e5], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]}, {'randomforestclassifier__max_features': ['auto', 'sqrt', 'log2']}, {'mlpclassifier__hidden_layer_sizes': [(50,), (25,25), (10,10)], 'mlpclassifier__activation' : ['identity', 'logistic', 'tanh', 'relu']}]
               
for classifier, classifier_params in zip(classifiers, classifier_params):

  # Predicting class on data as is, using classifier directly
  clf = classifier
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  
  # Grid search for dimensionality reduction followed by class prediction
  params = {'pca__n_components': range(1,n_features+1)}
  params.update(classifier_params)
  pipe = make_pipeline(PCA(), classifier)
  clf_pipe = GridSearchCV(pipe, params, cv=5)
  clf_pipe.fit(X_train, y_train)
  y_pred_pipe = clf_pipe.predict(X_test)
  
  # Print classification accuracy results
  print(pipe.steps[1][0].upper(), ':', clf_pipe.best_params_)
  print("\nPrecision: %1.3f \tRecall: %1.3f \t\tF1: %1.3f" % (precision_score(y_test, y_pred, average='macro'), recall_score(y_test, y_pred, average='macro'), f1_score(y_test, y_pred, average='macro')))
  print("Precision: %1.3f \tRecall: %1.3f \t\tF1: %1.3f\n" % (precision_score(y_test, y_pred_pipe, average='macro'), recall_score(y_test, y_pred_pipe, average='macro'), f1_score(y_test, y_pred_pipe, average='macro')))
  
warnings.filterwarnings("ignore")

GAUSSIANNB : {'pca__n_components': 7}

Precision: 0.980 	Recall: 0.985 		F1: 0.982
Precision: 0.963 	Recall: 0.964 		F1: 0.962

SVC : {'pca__n_components': 7, 'svc__C': 1000.0, 'svc__gamma': 0.0001}

Precision: 0.769 	Recall: 0.369 		F1: 0.225
Precision: 0.900 	Recall: 0.902 		F1: 0.900

RANDOMFORESTCLASSIFIER : {'pca__n_components': 12, 'randomforestclassifier__max_features': 'auto'}

Precision: 0.964 	Recall: 0.964 		F1: 0.964
Precision: 0.917 	Recall: 0.907 		F1: 0.905

MLPCLASSIFIER : {'mlpclassifier__activation': 'tanh', 'mlpclassifier__hidden_layer_sizes': (50,), 'pca__n_components': 13}

Precision: 0.947 	Recall: 0.949 		F1: 0.945
Precision: 0.965 	Recall: 0.958 		F1: 0.961

