### Imports 

In [34]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [35]:
df = pd.read_csv('winequality.csv')
df.head()

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,0,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,0,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,0,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,0,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


### Transform and split data

In [36]:
def data_preprocess(df):
    new_df = df.copy()
    new_df.replace(np.nan, 0, inplace = True)
    
    new_df['quality'] = new_df['quality'].apply(lambda x: 0 if x <= 5 else x)
    new_df['quality'] = new_df['quality'].apply(lambda x: 1 if x >= 6 else x)
    
    X = np.array(new_df.drop('quality', axis=1))
    y = np.array(new_df['quality'])
    
    ss_scaler = preprocessing.StandardScaler()
    pred = ss_scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(pred, y, test_size = 0.25, random_state = 42)
    return (X_train, y_train), (X_test, y_test)

In [37]:
(X_train, y_train), (X_test, y_test) = data_preprocess(df)

In [38]:
print(X_train[:2])

[[-0.57136659  0.07127869 -0.48054096  1.17914161 -0.09303318 -0.79974133
   0.0830898  -0.15472329 -0.36573452  0.13010447  0.06101473  0.25842195]
 [-0.57136659  1.50396711 -0.72301571  0.56008035 -0.63948302 -0.05776881
  -0.70572997  0.62379657  0.16787589 -0.86828773 -0.47467813 -0.99931317]]


### Train model SVC

In [39]:
def train_SVC_model(X_train,y_train):
    svcm = SVC(gamma = 'auto', random_state = 42)
    svcm.fit(X_train,y_train)
    return svcm

In [40]:
svc = train_SVC_model(X_train,y_train)
svc.classes_

array([0, 1], dtype=int64)

### Test model with log loss metric

In [41]:
def custom_scoring_function(y_true, y_pred):
    epsilon = 1e-15 
    y_pred = np.maximum(epsilon, y_pred)
    y_pred = np.minimum(1-epsilon, y_pred)
    loss_sum = 0
    n = len(y_true)
    for entry in range(len(y_true)):
        p = y_pred[entry]
        y = y_true[entry]
        loss_sum += -y * np.log(p) - (1-y) * np.log(1-p)
    return round(float(-loss_sum / -n), 7)

In [42]:
y_pred = svc.predict(X_test)
print('Log Loss value: ', custom_scoring_function(y_test, y_pred))
print('Accuracy: ',round(accuracy_score(y_test,y_pred),4))

Log Loss value:  7.3329468
Accuracy:  0.7877


### Get hyperparameters

In [43]:
def get_model_hyperparams(model):
    param = []
    for (key, value) in model.get_params().items():
        param.append(key)
    return param

In [44]:
get_model_hyperparams(svc)

['C',
 'cache_size',
 'class_weight',
 'coef0',
 'decision_function_shape',
 'degree',
 'gamma',
 'kernel',
 'max_iter',
 'probability',
 'random_state',
 'shrinking',
 'tol',
 'verbose']

### Hyperparameter search

In [45]:
def tune_SVC_model(X_train, y_train):
    from sklearn.metrics import make_scorer, fbeta_score
    scorer = make_scorer(fbeta_score, beta = 2)
    D = {'C':[0.1, 1, 10], 'gamma': [0.01, 0.1, 1]}
    grid = GridSearchCV(SVC(), param_grid = D, scoring = scorer)
    return grid.fit(X_train, y_train)

In [46]:
y_pred = svc.predict(X_test)
print('Log Loss value: ',custom_scoring_function(y_test,y_pred))
print('Accuracy: ',round(accuracy_score(y_test,y_pred),4))

Log Loss value:  7.3329468
Accuracy:  0.7877


### Return best parameters

In [47]:
def get_best_params(model):
    return model.best_param_

In [48]:
get_best_params(svc)

NameError: name 'svc_tuned' is not defined