In [1]:
# Import numpy,pandas and pyplot
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Import data

In [2]:
# Read the CSV file into a DataFrame: df
df = pd.read_csv('white-wine.csv')
df.head()
y = df['quality'].values
X=df.drop('quality', axis=1).values

# Pipeline for classification I

In [3]:
# Import the necessary modules
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report
# Setup the pipeline steps: steps
steps = [('scaler', StandardScaler()),
        ('knn',KNeighborsClassifier())]
        
# Create the pipeline: pipeline
pipeline =Pipeline(steps)

# Create train and test sets
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size=0.3,random_state=42)

# Fit the pipeline to the training set: knn_scaled
knn_scaled = pipeline.fit(X_train,y_train)

# Instantiate and fit a k-NN classifier to the unscaled data
knn_unscaled = KNeighborsClassifier().fit(X_train, y_train)

# Compute and print metrics
print('Accuracy with Scaling: {}'.format(knn_scaled.score(X_test,y_test)))
print('Accuracy without Scaling: {}'.format(knn_unscaled.score(X_test,y_test)))

Accuracy with Scaling: 0.5496598639455782
Accuracy without Scaling: 0.47551020408163264


# Pipeline for classification II

In [4]:
# Setup the pipeline
steps = [('scaler', StandardScaler()),
         ('SVM', SVC())]

pipeline = Pipeline(steps)

# Specify the hyperparameter space

parameters = {'SVM__C':[1, 10, 100],
              'SVM__gamma':[0.1, 0.01]}
#The hyperparameters you will tune are C and gamma. C controls the regularization strength. Gamma controls the kernel coefficient.
# Create train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=21)

# Instantiate the GridSearchCV object: cv
cv = GridSearchCV(pipeline,param_grid=parameters)

# Fit to the training set
cv.fit(X_train,y_train)

# Predict the labels of the test set: y_pred
y_pred = cv.predict(X_test)

# Compute and print metrics
print("Accuracy: {}".format(cv.score(X_test, y_test)))
print(classification_report(y_test, y_pred))
print("Tuned Model Parameters: {}".format(cv.best_params_))

Accuracy: 0.5857142857142857
             precision    recall  f1-score   support

          3       0.00      0.00      0.00         1
          4       0.40      0.29      0.33        28
          5       0.62      0.59      0.60       289
          6       0.60      0.72      0.65       447
          7       0.53      0.38      0.44       177
          8       0.33      0.13      0.19        38

avg / total       0.57      0.59      0.57       980

Tuned Model Parameters: {'SVM__C': 10, 'SVM__gamma': 0.1}
