In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.datasets import load_wine
import dalex
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# Zbiór danych apartments

In [None]:
aparts = dalex.datasets.load_apartments()
print(aparts.info())
aparts.head()

### Encoding i podział na zbiory treningowy i testowy

In [None]:
le=LabelEncoder()
aparts['district'] = le.fit_transform(aparts['district'])

X = aparts.drop('district', axis = 1)
y = aparts['district']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.3, random_state = 29)

### Prosty model bez skalowania

In [None]:
model_svc = SVC()

model_svc.fit(X_train, y_train)
y_pred = model_svc.predict(X_test)

simple_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla prostego modelu bez skalowania i strojenia hiperparametrów wynosi: ',  simple_model)

### Strojenie hiperparametrów bez skalowania

In [None]:
model_svc = SVC()

C = [i for i in range(10)]
C.append(20)
C.append(50)

parameters = {
    'C' : C,
    'gamma' : ['scale', 'auto', 3, 10],
    'degree': np.linspace(1, 5, 5)
}

svc_rand = RandomizedSearchCV(model_svc, parameters, n_iter=1000, n_jobs=-1)
best_svc = svc_rand.fit(X_train ,y_train)
best_svc.best_estimator_

In [None]:
model_svc = best_svc.best_estimator_
model_svc.fit(X_train, y_train)

y_pred = model_svc.predict(X_test)

hiper_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla modelu bez skalowania ze strojeniem hiperparametrów wynosi: ',  hiper_model)

### Skalowanie bez strojenia hiperparametrów

In [None]:
scaler = StandardScaler()
X_train =  scaler.fit_transform(X_train)
X_test =  scaler.fit_transform(X_test)

model_svc = SVC()

model_svc.fit(X_train, y_train)
y_pred = model_svc.predict(X_test)

scal_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla prostego modelu ze skalowaniem i bez strojenia hiperparametrów wynosi: ',  scal_model)

### Strojenie hiperparametrów ze skalowaniem

In [None]:
model_svc = SVC()

svc_rand = RandomizedSearchCV(model_svc, parameters, n_iter=1000, n_jobs=-1)
best_svm = svc_rand.fit(X_train ,y_train)
best_svm.best_estimator_

In [None]:
model_svc = best_svm.best_estimator_

model_svc.fit(X_train, y_train)

y_pred = model_svc.predict(X_test)

hiper_scal_model = mean_squared_error(y_test, y_pred, squared=True)
print('RMSE dla modelu ze skalowaniem i strojeniem hiperparametrów wynosi: ',  hiper_scal_model)

### Podsumowanie
Skalowanie poprawiło wyniki modelu, za to strojenie hiperparametrów bez skalowania nie poprawiło wyniku, a po skalowaniu nawet pogorszyło, może to wynikać z tego, że RandomizedSearch nie trafił w optymalne rozwiązanie.

# Zbiór danych Wine

In [None]:
wine_dict = load_wine()

In [None]:
X = pd.DataFrame(wine_dict['data'], columns = wine_dict['feature_names'])
Y = wine_dict['target']

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state = 29, test_size = 0.3)

### Prosty model bez skalowania

In [None]:
model_svc = SVC(random_state=29)

model_svc.fit(X_train, y_train)
y_pred = model_svc.predict(X_test)

simple_model = accuracy_score(y_test, y_pred)

print('Accuracy score dla prostego modelu bez skalowania i strojenia hiperparametrów wynosi: ',  simple_model)

### Strojenie hiperparametrów bez skalowania

In [None]:
model_svc = SVC()

parameters = {
    'C' : [np.linspace(1, 10, 10), 20, 50],
    'gamma' : ['scale', 'auto', 3, 10],
    'degree': np.linspace(1, 5, 5),
    'kernel': ['linear', 'poly', 'rbf']
}

svc_rand = RandomizedSearchCV(model_svc, parameters,  n_iter=100)
best_svc = svc_rand.fit(X_train ,y_train)
best_svc.best_estimator_

In [None]:
model_svc = best_svc.best_estimator_

model_svc.fit(X_train, y_train)

y_pred = model_svc.predict(X_test)

hiper_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla modelu bez skalowania ze strojeniem hiperparametrów wynosi: ',  hiper_model)

### Skalowanie bez strojenia hiperparametrów

In [None]:
scaler = StandardScaler()
X_train =  scaler.fit_transform(X_train)
X_test =  scaler.fit_transform(X_test)

model_svc = SVC(random_state=29)

model_svc.fit(X_train, y_train)
y_pred = model_svc.predict(X_test)

scal_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla prostego modelu ze skalowaniem i bez strojenia hiperparametrów wynosi: ',  scal_model)

### Strojenie hiperparametrów ze skalowaniem

In [None]:
model_svc = SVC()
svc_rand = RandomizedSearchCV(model_svc, parameters,  n_iter=100, random_state=29)
best_svc = svc_rand.fit(X_train ,y_train)
best_svc.best_estimator_

In [None]:
model_svc = best_svc.best_estimator_

model_svc.fit(X_train, y_train)

y_pred = model_svc.predict(X_test)

hiper_model = accuracy_score(y_test, y_pred)
print('Accuracy score dla modelu ze skalowaniem i strojeniem hiperparametrów wynosi: ',  hiper_model)

### Podsumowanie
Skalowanie sprawiło, że model ma prawie 98% poprawności, jednak stojenie hiperparametrów nie poprawiło wyników.