In [21]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [23]:
df = pd.read_csv("winequality-red.csv")

In [25]:
df.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

In [26]:
df.shape

(1599, 12)

In [27]:
X = df.drop(["quality"], axis=1)
y = df["quality"]

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [29]:
ss = StandardScaler()

In [30]:
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

In [31]:
svc_model = SVC()
svc_model.fit(X_train_scaled, y_train)
y_pred = svc_model.predict(X_test_scaled)
print("SVC: ", accuracy_score(y_test, y_pred))

SVC:  0.584375


In [32]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train_scaled, y_train)
y_pred = knn_model.predict(X_test_scaled)
print("KNN: ", accuracy_score(y_test, y_pred))

KNN:  0.5375


In [33]:
svc_params = {
    "C": [1, 2, 3, 4, 5],
    "tol": [0.001, 0.005],
}

svc = SVC()
svc_cv_model = GridSearchCV(svc, svc_params, cv=10, n_jobs=-1).fit(X_train_scaled, y_train)
svc_cv_model.best_params_



{'C': 1, 'tol': 0.001}

In [48]:
svc_tuned = SVC(C=15, tol=0.001, kernel="rbf")
svc_tuned.fit(X_train_scaled, y_train)
y_pred = svc_tuned.predict(X_test_scaled)
print("SVC: ", accuracy_score(y_test, y_pred))

SVC:  0.63125


In [49]:
pickle.dump(svc_tuned, open("SVC.pkl", "wb"))