In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('winequality-red.csv')

In [3]:
x = data.drop(columns=['quality'])
y = data.quality

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=20)

In [5]:
x_train.shape, y_train.shape

((1199, 11), (1199,))

In [6]:
knn = KNeighborsClassifier()

In [7]:
knn.fit(x_train, y_train)

KNeighborsClassifier()

In [8]:
knn.score(x_test, y_test)

0.5175

In [9]:
grid_params = {
    "n_neighbors":[3,5,7,9,12,13,15,17,21],
    'algorithm':['auto','ball_tree', 'kd_tree','brute'],
    'leaf_size':[10,15,20,25,30,45,50],
    'p':[1,2],
    'weights':['uniform','distance']
}

In [10]:
grid_cv = GridSearchCV(knn, param_grid=grid_params, cv=7)


In [11]:
grid_cv.fit(x_train,y_train)

GridSearchCV(cv=7, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [10, 15, 20, 25, 30, 45, 50],
                         'n_neighbors': [3, 5, 7, 9, 12, 13, 15, 17, 21],
                         'p': [1, 2], 'weights': ['uniform', 'distance']})

In [12]:
grid_cv.best_params_

{'algorithm': 'auto',
 'leaf_size': 10,
 'n_neighbors': 17,
 'p': 1,
 'weights': 'distance'}

In [13]:
knn_new = KNeighborsClassifier(algorithm='auto', leaf_size=10,n_neighbors=21,p=1,weights='distance')

In [14]:
knn_new.fit(x_train,y_train)

KNeighborsClassifier(leaf_size=10, n_neighbors=21, p=1, weights='distance')

In [15]:
knn_new.score(x_test, y_test)

0.6325

In [16]:
# We are able to improvise the result.

In [17]:
import pickle
pickle.dump(knn, open('knn1.pkl','wb'))

In [18]:
pickle.dump(knn_new,open('knn_new','wb'))

In [19]:
knn.predict(x_test)

array([6, 5, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 6, 5, 5, 5, 5, 5, 6, 6, 4, 6,
       6, 5, 7, 5, 6, 5, 5, 5, 6, 5, 5, 6, 5, 7, 6, 5, 5, 6, 5, 5, 6, 5,
       7, 6, 5, 5, 5, 6, 6, 5, 5, 5, 6, 6, 5, 6, 5, 6, 6, 5, 6, 5, 5, 5,
       5, 5, 7, 5, 5, 5, 6, 6, 5, 5, 6, 5, 6, 5, 5, 6, 6, 5, 4, 6, 5, 5,
       5, 5, 5, 6, 5, 6, 6, 6, 5, 6, 5, 6, 5, 5, 7, 4, 6, 6, 6, 5, 7, 6,
       5, 6, 7, 5, 5, 6, 6, 5, 6, 6, 5, 5, 5, 6, 5, 5, 5, 5, 5, 6, 5, 5,
       5, 5, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 4, 6, 5, 6, 6, 5, 6, 6,
       6, 5, 5, 6, 5, 6, 6, 5, 6, 5, 5, 7, 5, 5, 6, 5, 5, 7, 5, 6, 6, 7,
       5, 7, 5, 5, 7, 7, 7, 5, 6, 6, 6, 5, 7, 5, 6, 5, 6, 5, 5, 5, 5, 5,
       6, 6, 6, 5, 7, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 5, 6, 5, 5, 6, 5, 6,
       5, 6, 6, 6, 6, 7, 5, 5, 6, 6, 5, 6, 7, 6, 6, 5, 7, 5, 6, 5, 5, 7,
       6, 5, 5, 6, 5, 6, 5, 5, 5, 5, 6, 5, 5, 6, 6, 6, 5, 6, 6, 5, 5, 5,
       6, 5, 5, 5, 6, 3, 6, 6, 7, 6, 5, 7, 7, 5, 5, 5, 6, 6, 6, 6, 5, 5,
       5, 6, 5, 6, 6, 6, 5, 7, 6, 6, 6, 5, 6, 6, 5,

In [20]:
confusion_matrix(y_test, knn.predict(x_test))

array([[  0,   0,   1,   1,   0,   0],
       [  0,   0,   6,   4,   0,   0],
       [  1,   4, 111,  51,   3,   0],
       [  0,   3,  70,  80,  12,   0],
       [  0,   0,  12,  23,  16,   0],
       [  0,   0,   0,   2,   0,   0]], dtype=int64)

In [21]:
 from sklearn.svm import SVC
    

In [22]:
svc = SVC()
svc.fit(x_train, y_train)

SVC()

In [23]:
svc.score(x_test, y_test)

0.5

In [28]:
param=  {
    "kernel":['linear', 'poly', 'rbf', 'sigmoid'],
    "C":[.1,.4,.6,1,2,3,100,200,500],
    'gamma':[.001, .1,.4,.004,.003]
}

svm_grid = GridSearchCV(svc, param_grid=param,verbose =3)

In [None]:
svm_grid.fit(x_train, y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV 1/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.600 total time=   0.1s
[CV 2/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.525 total time=   0.3s
[CV 3/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.575 total time=   0.1s
[CV 4/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.600 total time=   0.2s
[CV 5/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.548 total time=   0.3s
[CV 1/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.517 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.500 total time=   0.1s
[CV 3/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.479 total time=   0.1s
[CV 4/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.483 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.001, kernel=poly;, score=0.510 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.529 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rb

# Stacking

In [21]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, SVR
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


In [3]:
df = pd.read_csv('winequality-red.csv')
x = df.drop(columns=['quality'])
y = df.quality

In [12]:
train, val_train,test,val_test= train_test_split(x,y,test_size=.50,random_state=30)

In [13]:
x_train ,x_test ,y_train ,y_test = train_test_split(train,test,test_size=.20, random_state =30)

In [14]:
knn = KNeighborsClassifier()
knn.fit(x_train ,y_train)

KNeighborsClassifier()

In [15]:
knn.score(x_test, y_test)

0.54375

In [16]:
svc = SVC()
svc.fit(x_train,y_train)

SVC()

In [17]:
svc.score(x_test,y_test)

0.4875

In [18]:
prediction_knn = knn.predict(val_train)
prediction_svc = svc.predict(val_train)

In [25]:
input_3 = np.column_stack((prediction_knn, prediction_knn))

In [26]:
output = val_test

In [33]:
stack_rf = RandomForestClassifier()
stack_rf.fit(input_3, output)

RandomForestClassifier()

In [34]:
# 10% of the dataset will be used for validation purpose

In [None]:
output_stack1 = np.column_stack((knn_output, svc_output))