<a href="https://colab.research.google.com/github/olhaishchenko/-data_science/blob/master/H_w__5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [73]:
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import GridSearchCV


In [74]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/df_accelerometer.csv", delimiter=",")

In [75]:
df_cut = df.drop("kinds", axis=1)

In [76]:
X_train, X_test, y_train, y_test = train_test_split(df_cut, df["kinds"], test_size=0.2, shuffle=True)

In [77]:

model = RandomForestClassifier(n_estimators=10)# n_estimators - кількість дерев

In [78]:
model.fit(X_train, y_train)

In [79]:
f1_score_train_micro = f1_score(y_train, model.predict(X_train), average="micro")
f1_score_test_micro = f1_score(y_test, model.predict(X_test), average="micro")
print(f'{f1_score_train_micro = }, {f1_score_test_micro = }')

f1_score_train_micro = 1.0, f1_score_test_micro = 0.9992266047950503


In [80]:
f1_score_train_macro = f1_score(y_train, model.predict(X_train), average="macro")
f1_score_test_macro = f1_score(y_test, model.predict(X_test), average="macro")
print(f'{f1_score_train_macro = }, {f1_score_test_macro = }')

f1_score_train_macro = 1.0, f1_score_test_macro = 0.994355753534571


In [81]:
f1_score_train_weighted = f1_score(y_train, model.predict(X_train), average="weighted")
f1_score_test_weighted = f1_score(y_test, model.predict(X_test), average="weighted")
print(f'{f1_score_train_weighted = }, {f1_score_test_weighted = }')

f1_score_train_weighted = 1.0, f1_score_test_weighted = 0.9992343295458731


In [82]:
X_train, X_valid, y_train, y_valid = train_test_split(df_cut, df["kinds"], test_size=0.2, shuffle=True)

**SVC rbf**

In [83]:

svc_rbf = SVC(kernel='rbf', C=1, probability=True).fit(X_train, y_train)

In [84]:
y_valid_pred__rbf = svc_rbf.predict(X_valid)
y_train_pred__rbf = svc_rbf.predict(X_train)


In [85]:
ac_train_rbf = accuracy_score(y_train, y_train_pred__rbf >= 0.5)
ac_valid_rbf = accuracy_score(y_valid, y_valid_pred__rbf >= 0.5)

In [86]:
f1_sc_train_rbf = f1_score(y_train, y_train_pred__rbf , average="macro")
f1_sc_valid_rbf = f1_score(y_valid, y_valid_pred__rbf, average="macro")

In [87]:
y_valid_pred_rbf = svc_rbf.predict_proba(X_valid)
y_train_pred_rbf = svc_rbf.predict_proba(X_train)

**SVC linear**

In [88]:
svc_linear = SVC(kernel='linear', C=2, probability=True).fit(X_train, y_train)

In [89]:
y_valid_pred_linear = svc_linear.predict_proba(X_valid)[:, 1]
y_train_pred_linear = svc_linear.predict_proba(X_train)[:, 1]

In [90]:
ac_train_linear = accuracy_score(y_train, y_train_pred_linear >= 0.5)
ac_valid_linear = accuracy_score(y_valid, y_valid_pred_linear >= 0.5)

In [91]:
f1_sc_train_linear = f1_score(y_train, y_train_pred_linear >= 0.5, average="macro")
f1_sc_valid_linear = f1_score(y_valid, y_valid_pred_linear >= 0.5, average="macro")

**SVC poly**

In [102]:
svc_poly = SVC(kernel='poly', degree=2, C=100, probability=True).fit(X_train, y_train)

In [103]:
y_valid_pred_poly = svc_poly.predict_proba(X_valid)[:, 1]
y_train_pred_poly = svc_poly.predict_proba(X_train)[:, 1]

In [104]:
ac_train_poly = accuracy_score(y_train, y_train_pred_poly>0.5)
ac_valid_poly = accuracy_score(y_valid, y_valid_pred_poly>0.5)

In [105]:
f1_sc_train_poly = f1_score(y_train, y_train_pred_poly >= 0.5, average="macro")
f1_sc_valid_poly = f1_score(y_valid, y_valid_pred_poly >= 0.5, average="macro")

In [106]:
print(f'{ac_train_rbf = }, {ac_valid_rbf = }')
print(f'{f1_sc_train_rbf = }, {f1_sc_valid_rbf = }')

print(f'{ac_train_linear = }, {ac_valid_linear = }')
print(f'{f1_sc_train_linear = }, {f1_sc_valid_linear = }')

print(f'{ac_train_poly = }, {ac_valid_poly = }')
print(f'{f1_sc_train_poly = }, {f1_sc_valid_poly = }')


ac_train_rbf = 0.16270071580576514, ac_valid_rbf = 0.1531322505800464
f1_sc_train_rbf = 0.762124275290761, f1_sc_valid_rbf = 0.8012495999158433
ac_train_linear = 0.0, ac_valid_linear = 0.0
f1_sc_train_linear = 0.0, f1_sc_valid_linear = 0.0
ac_train_poly = 0.0, ac_valid_poly = 0.0
f1_sc_train_poly = 0.0, f1_sc_valid_poly = 0.0


# підбір параметрів

In [97]:
svc_poly = SVC(kernel='poly', probability=True).fit(X_train, y_train)
params = {
    'C': [0.01, 0.1, 1., 10., 100.],
    'degree': [1, 2, 3, 4, 5]
}
# {'C': 100.0, 'degree': 3}

In [98]:
model = GridSearchCV(svc_poly, params, cv=5, verbose=True).fit(X_train, y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits


In [99]:
model.best_params_, model.best_score_

({'C': 100.0, 'degree': 2}, 0.9978714042028907)

In [100]:
best_model = model.best_estimator_
best_model