In [58]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline

# import warnings
# warnings.filterwarnings("ignore")

In [59]:
train = pd.read_csv("train.csv", index_col=0)
X = train.drop("quality", axis=1)
y = train["quality"]
test = pd.read_csv("test.csv", index_col=0)
sub = pd.read_csv("sample_submission.csv")

In [60]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=23, stratify=y
)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)

Linear SVM with Standard Scalar 

In [49]:
scaler = StandardScaler()
svm = SVC(kernel="linear", probability=True, random_state=23)
pipe = Pipeline([("SCL", scaler), ("SVM", svm)])
params = {
    "SVM__C": np.linspace(0.001, 5, 10),
    "SVM__decision_function_shape": ["ovo", "ovr"],
}
gcv = GridSearchCV(pipe, param_grid=params, verbose=3, cv=kfold, scoring="neg_log_loss")
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.094 total time=   0.4s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.049 total time=   0.4s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.058 total time=   0.5s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.061 total time=   0.4s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.058 total time=   0.4s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.094 total time=   0.3s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.049 total time=   0.3s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.058 total time=   0.3s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.061 total time=   0.3s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.058 total time=   0.3

KeyboardInterrupt: 

Linear SVM with MinMax Scalar

In [None]:
scaler = MinMaxScaler()

svm = SVC(kernel="linear", probability=True, random_state=23)
pipe = Pipeline([("SCL", scaler), ("SVM", svm)])
params = {
    "SCL": [scaler],
    "SVM__C": np.linspace(0.001, 5, 10),
    "SVM__decision_function_shape": ["ovo", "ovr"],
}
gcv = GridSearchCV(pipe, param_grid=params, verbose=3, cv=kfold, scoring="neg_log_loss")
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.116 total time=   0.3s
[CV 2/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.060 total time=   0.4s
[CV 3/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.094 total time=   0.4s
[CV 4/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.082 total time=   0.5s
[CV 5/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-1.063 total time=   0.4s
[CV 1/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.116 total time=   0.3s
[CV 2/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.060 total time=   0.4s
[CV 3/5] END SCL=MinMaxScaler(), SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-1.094 total time=   0.6s
[CV 4/5] END SCL=MinMaxSca

Radial SVM with Standard scalar

In [None]:
scaler = StandardScaler()
svm = SVC(kernel="rbf", probability=True, random_state=23)
pipe = Pipeline([("SCL", scaler), ("SVM", svm)])
params = {
    "SVM__C": np.linspace(0.001, 5, 10),
    "SVM__gamma": list(np.linspace(0.001, 5, 10)) + ["scale", "auto"],
    "SVM__decision_function_shape": ["ovo", "ovr"],
}
gcv = GridSearchCV(pipe, param_grid=params, verbose=5, cv=kfold, scoring="neg_log_loss")
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

Fitting 5 folds for each of 240 candidates, totalling 1200 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-1.174 total time=   1.0s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-1.162 total time=   0.8s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-1.125 total time=   0.7s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-1.177 total time=   0.7s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.001;, score=-1.249 total time=   0.7s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.5564444444444444;, score=-1.176 total time=   0.7s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.5564444444444444;, score=-1.106 total time=   0.7s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo, SVM__gamma=0.5564444444444444;, score=-1.077 total time=   0.7s
[

Radial SVM with MinMax scalar

In [None]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)
scaler = MinMaxScaler()
svm = SVC(kernel="rbf", probability=True, random_state=23)
pipe = Pipeline([("SCL", scaler), ("SVM", svm)])
params = {
    "SVM__C": np.linspace(0.001, 5, 10),
    "SVM__gamma": list(np.linspace(0.001, 5, 10)) + ["scale", "auto"],
    "SVM__decision_function_shape": ["ovo", "ovr"],
}
gcv = GridSearchCV(pipe, param_grid=params, n_jobs=5, cv=kfold, scoring="neg_log_loss")
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

pd_cv = pd.DataFrame(gcv.cv_results_)

{'SVM__C': 5.0, 'SVM__decision_function_shape': 'ovo', 'SVM__gamma': 0.5564444444444444}
-1.0312897379357935


In [61]:
svm = SVC(kernel="rbf", probability=True, random_state=23)
pipe = Pipeline([("SCL", scaler), ("SVM", svm)])
params = {
    "SCL": [MinMaxScaler(), StandardScaler()],
    "SVM__C": [5],
    "SVM__gamma": list(np.linspace(0.001, 5, 10)) + ["scale", "auto"],
    "SVM__decision_function_shape": ["ovo", "ovr"],
    "SVM__kernel": ["linear", "rbf"],
}
gcv = GridSearchCV(pipe, param_grid=params, n_jobs=5, cv=kfold, scoring="neg_log_loss")
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

{'SCL': MinMaxScaler(), 'SVM__C': 5, 'SVM__decision_function_shape': 'ovo', 'SVM__gamma': 0.5564444444444444, 'SVM__kernel': 'rbf'}
-1.0312897379357935


In [65]:
y_pred = gcv.predict(test)
sub['quality'] = y_pred
sub.to_csv('submission_pure.csv',index=False)