In [36]:
from sklearn.datasets import load_iris
iris = load_iris()
x, y = iris.data, iris.target
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
svc = SVC(kernel='linear')
scores = cross_val_score(svc, x, y, cv=5)
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

[0.96666667 1.         0.96666667 0.96666667 1.        ]
Accuracy: 0.98 (+/- 0.03)


In [37]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
x, y = cancer.data, cancer.target
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
params_tree = {'max_depth': range(5, 15), 'criterion': ['gini', 'entropy'], 'min_samples_split': range(2, 10)}
kf = KFold(n_splits=10, shuffle=False)
grid_tree = GridSearchCV(dt, params_tree, cv=kf)
grid_tree.fit(x, y)
print(grid_tree.best_params_)
print(grid_tree.best_score_)
from sklearn.metrics import classification_report
print(classification_report(y, grid_tree.predict(x)))

{'criterion': 'entropy', 'max_depth': 14, 'min_samples_split': 8}
0.9455200501253131
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       212
           1       1.00      0.99      0.99       357

    accuracy                           0.99       569
   macro avg       0.99      0.99      0.99       569
weighted avg       0.99      0.99      0.99       569


In [38]:
from sklearn.datasets import load_breast_cancer
breast_cancer = load_breast_cancer()
x, y = breast_cancer.data, breast_cancer.target
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [39]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1000)
lr.fit(x_train, y_train)
lr_y_pred = lr.predict(x_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, lr_y_pred))

              precision    recall  f1-score   support

           0       0.97      0.98      0.98        63
           1       0.99      0.98      0.99       108

    accuracy                           0.98       171
   macro avg       0.98      0.98      0.98       171
weighted avg       0.98      0.98      0.98       171


In [40]:
from sklearn.model_selection import GridSearchCV
params_lr = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'penalty': ['l2'], 'tol': [1e-6, 1e-5, 1e-4]}
from sklearn.model_selection import KFold
kf = KFold(n_splits=10, shuffle=False)
grid_lr = GridSearchCV(lr, params_lr, cv=kf)
grid_lr.fit(x_train, y_train)
print(grid_lr.best_params_)
print(grid_lr.best_score_)
from sklearn.metrics import classification_report
print(classification_report(y_test, grid_lr.predict(x_test)))

{'C': 0.1, 'penalty': 'l2', 'tol': 1e-06}
0.9748717948717948
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        63
           1       0.98      1.00      0.99       108

    accuracy                           0.99       171
   macro avg       0.99      0.98      0.99       171
weighted avg       0.99      0.99      0.99       171


In [41]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(x_train, y_train)
svc_y_pred = svc.predict(x_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, svc_y_pred))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        63
           1       0.98      0.98      0.98       108

    accuracy                           0.98       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.98      0.98      0.98       171


In [42]:
from sklearn.model_selection import GridSearchCV
params_lsvc = {'C': [4.5, 5, 5.5, 6], 'gamma': [0.0009, 0.001, 0.0011, 0.002]}
from sklearn.model_selection import KFold
kf = KFold(n_splits=10, shuffle=False)
grid_svc = GridSearchCV(svc, params_lsvc, cv=kf)
grid_svc.fit(x_train, y_train)
print(grid_svc.best_params_)
print(grid_svc.best_score_)
from sklearn.metrics import classification_report
print(classification_report(y_test, grid_svc.predict(x_test)))

{'C': 5.5, 'gamma': 0.002}
0.9697435897435897
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        63
           1       0.98      1.00      0.99       108

    accuracy                           0.99       171
   macro avg       0.99      0.98      0.99       171
weighted avg       0.99      0.99      0.99       171


In [43]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
dt_y_pred = dt.predict(x_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, dt_y_pred))

              precision    recall  f1-score   support

           0       0.87      0.95      0.91        63
           1       0.97      0.92      0.94       108

    accuracy                           0.93       171
   macro avg       0.92      0.93      0.93       171
weighted avg       0.93      0.93      0.93       171


In [45]:
from sklearn.model_selection import GridSearchCV
params_dt = {'max_depth': range(5, 15), 'criterion': ['gini', 'entropy'], 'min_samples_split': range(2, 10)}
from sklearn.model_selection import KFold
kf = KFold(n_splits=10, shuffle=False)
grid_dt = GridSearchCV(dt, params_dt, cv=kf)
grid_dt.fit(x_train, y_train)
print(grid_dt.best_params_)
print(grid_dt.best_score_)
from sklearn.metrics import classification_report
print(classification_report(y_test, grid_dt.predict(x_test)))

{'criterion': 'entropy', 'max_depth': 11, 'min_samples_split': 9}
0.9422435897435898
              precision    recall  f1-score   support

           0       0.95      0.90      0.93        63
           1       0.95      0.97      0.96       108

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171


In [46]:
x_try = x[30].reshape(1, -1)
grid_search_y_lr_pred = grid_lr.predict(x_try)
print(y[30], grid_search_y_lr_pred)

0 [0]
