In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import numpy as np

In [9]:
data = load_breast_cancer()
print(type(data.data))
print(data.data.shape)
print(type(data.target))
print(data.target.shape)

<class 'numpy.ndarray'>
(569, 30)
<class 'numpy.ndarray'>
(569,)


In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data.data,data.target,test_size=0.3)

Logistic Regression

In [11]:
lr = LogisticRegression(solver='liblinear', multi_class='ovr')
lr.fit(X_train, y_train)
lr.score(X_test, y_test)



0.9590643274853801

SVM

In [12]:
svm = SVC(gamma='auto')
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.6549707602339181

RandomForest

In [13]:
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9707602339181286

KFold Cross Validation

In [14]:
from sklearn.model_selection import StratifiedKFold

In [15]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [16]:
folds = StratifiedKFold(n_splits=3)

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in folds.split(data.data,data.target):
    X_train, X_test, y_train, y_test = data.data[train_index], data.data[test_index], \
                                       data.target[train_index], data.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))
    scores_svm.append(get_score(SVC(gamma='auto'), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))



In [18]:
print("Logistic Regression scores:\n", scores_logistic)
print("SVM scores:\n", scores_svm)
print("Random Forest scores:\n", scores_rf)

Logistic Regression scores:
 [0.9368421052631579, 0.968421052631579, 0.9417989417989417]
SVM scores:
 [0.6263157894736842, 0.6263157894736842, 0.6296296296296297]
Random Forest scores:
 [0.9368421052631579, 0.9789473684210527, 0.9682539682539683]


Cross Validation Score Function

In [None]:
from sklearn.model_selection import cross_val_score

Logistic regression with cv score function

In [21]:
cross_val_score(LogisticRegression(solver='liblinear', multi_class='ovr'), data.data, data.target, cv=3)



array([0.93684211, 0.96842105, 0.94179894])

SVM with cv score function

In [22]:
cross_val_score(SVC(gamma='auto'), data.data, data.target,cv=3)

array([0.62631579, 0.62631579, 0.62962963])

Random Forest with cv score function

In [23]:
cross_val_score(RandomForestClassifier(n_estimators=40),data.data, data.target,cv=3)

array([0.92105263, 0.96842105, 0.95767196])