In [1]:
#some imports
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [2]:
data = pd.read_csv('./kylemontgomery/Raisin_Dataset.csv').to_numpy()

X = data[:, :-1]
y = data[:, -1]

# encoding the target column
y[y=='Kecimen'] = 1
y[y=='Besni'] = -1
y = y.astype('int')

In [3]:
# split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33, stratify=y, test_size=0.2)

In [4]:
# scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# fit svc with linear kernel
svc = SVC(kernel='linear')
params = {'C': [0.001, 0.01, 0.1, 0.7, 1, 2, 3]}
search = GridSearchCV(svc, param_grid=params, cv=5, n_jobs=-1, verbose=4)
search.fit(X_train, y_train)

print('Best parameters: ', search.best_params_)
print('Best cross-validation score:', search.best_score_)
print('Test set score: ', search.score(X_test, y_test))

Fitting 5 folds for each of 7 candidates, totalling 35 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 128 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  35 | elapsed:   13.0s remaining:  1.3min
[Parallel(n_jobs=-1)]: Done  14 out of  35 | elapsed:   13.3s remaining:   20.0s
[Parallel(n_jobs=-1)]: Done  23 out of  35 | elapsed:   13.4s remaining:    7.0s
[Parallel(n_jobs=-1)]: Done  32 out of  35 | elapsed:   13.5s remaining:    1.3s
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:   13.5s finished


Best parameters:  {'C': 3}
Best cross-validation score: 0.875
Test set score:  0.85


In [6]:
#fit svc with polynomial kernel
svc = SVC(kernel='poly')
params = {'C': [0.001, 0.01, 0.1, 0.7, 1, 2, 3], 'degree': [2, 3, 4], 'gamma': [0.01, 0.1, 1, 'scale'], 'coef0': [0, 1]}
search = GridSearchCV(svc, param_grid=params, cv=5, n_jobs=-1, verbose=4)
search.fit(X_train, y_train)

print('Best parameters: ', search.best_params_)
print('Best cross-validation score:', search.best_score_)
print('Test set score: ', search.score(X_test, y_test))

Fitting 5 folds for each of 168 candidates, totalling 840 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 128 concurrent workers.
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 585 out of 840 | elapsed:    0.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 796 out of 840 | elapsed:    8.7s remaining:    0.5s


Best parameters:  {'C': 0.7, 'coef0': 1, 'degree': 3, 'gamma': 0.1}
Best cross-validation score: 0.8763888888888889
Test set score:  0.8444444444444444


[Parallel(n_jobs=-1)]: Done 840 out of 840 | elapsed:    8.9s finished


In [7]:
#fit svc with rbf kernel
svc = SVC(kernel='rbf')
params = {'C': [0.001, 0.01, 0.1, 0.7, 1, 2, 3], 'gamma': [0.01, 0.1, 1, 'scale']}
search = GridSearchCV(svc, param_grid=params, cv=5, n_jobs=-1, verbose=4)
search.fit(X_train, y_train)

print('Best parameters: ', search.best_params_)
print('Best cross-validation score:', search.best_score_)
print('Test set score: ', search.score(X_test, y_test))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 128 concurrent workers.
[Parallel(n_jobs=-1)]: Done  29 out of 140 | elapsed:    0.1s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  65 out of 140 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 101 out of 140 | elapsed:    0.2s remaining:    0.1s


Fitting 5 folds for each of 28 candidates, totalling 140 fits
Best parameters:  {'C': 0.1, 'gamma': 0.1}
Best cross-validation score: 0.875
Test set score:  0.8611111111111112


[Parallel(n_jobs=-1)]: Done 137 out of 140 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 140 out of 140 | elapsed:    0.2s finished


In [8]:
#fit logistic regression
lr = LogisticRegression()
params = {'C': [0.001, 0.01, 0.1, 0.7, 1, 2, 3]}
search = GridSearchCV(lr, param_grid=params, cv=5, n_jobs=-1, verbose=4)
search.fit(X_train, y_train)

print('Best parameters: ', search.best_params_)
print('Best cross-validation score:', search.best_score_)
print('Test set score: ', search.score(X_test, y_test))

Fitting 5 folds for each of 7 candidates, totalling 35 fits
Best parameters:  {'C': 3}
Best cross-validation score: 0.8680555555555557
Test set score:  0.8388888888888889


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 128 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  35 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  14 out of  35 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  23 out of  35 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  32 out of  35 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    0.0s finished
