<a href="https://colab.research.google.com/github/rahiakela/mlops-research-and-practice/blob/main/machine-learning-engineering-with-python/03-model-factory/01_feature_engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.linear_model import RidgeClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.pipeline import make_pipeline

##Dataset

In [2]:
X, y = load_wine(return_X_y=True)

# Make a train/test split using 30% test size
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.30,
                                                    random_state=42)

##Modeling

In [3]:
# Fit ridge classifier to the data
no_scale_clf = make_pipeline(RidgeClassifier(tol=1e-2, solver="sag"))
no_scale_clf.fit(X_train, y_train)
y_pred_no_scale = no_scale_clf.predict(X_test)

In [4]:
# Fit a ridge classifier after performing standard scaling
std_scale_clf = make_pipeline(StandardScaler(), RidgeClassifier(tol=1e-2, solver="sag"))
std_scale_clf.fit(X_train, y_train)
y_pred_std_scale = std_scale_clf.predict(X_test)

##Prediction

In [10]:
# Prediction accuracies without scaling
print('\nAccuracy [no scaling]')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, y_pred_no_scale)))

print('\nClassification Report [no scaling]')
print(metrics.classification_report(y_test, y_pred_no_scale))

# Prediction accuracies with scaling
print("--"*30)
print('\nAccuracy [scaling]')
print('{:.2%}\n'.format(metrics.accuracy_score(y_test, y_pred_std_scale)))

print('\nClassification Report [scaling]')
print(metrics.classification_report(y_test, y_pred_std_scale))


Accuracy [no scaling]
75.93%


Classification Report [no scaling]
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        19
           1       0.66      1.00      0.79        21
           2       1.00      0.07      0.13        14

    accuracy                           0.76        54
   macro avg       0.85      0.69      0.63        54
weighted avg       0.83      0.76      0.68        54

------------------------------------------------------------

Accuracy [scaling]
98.15%


Classification Report [scaling]
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       1.00      0.95      0.98        21
           2       1.00      1.00      1.00        14

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54

