In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [3]:
trainX, testX, trainY, testY = train_test_split(X, y, random_state=42, test_size=0.2)
print(trainX.shape, trainY.shape, testX.shape, testY.shape)

(455, 30) (455,) (114, 30) (114,)


In [4]:
print(testY)

[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0]


## Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=42, solver='liblinear').fit(trainX, trainY)
out_y = clf.predict(testX)
print(out_y)

[1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 0 0]


In [6]:
out_prob = clf.predict_proba(testX) 
print(out_prob[0])

[0.16491424 0.83508576]


In [7]:
out_score = clf.score(testX, testY)
print('Logistic score:', out_score)

Logistic score: 0.956140350877193


## Perceptron

In [8]:
from sklearn.linear_model import Perceptron
clf = Perceptron(random_state=42).fit(trainX, trainY)
out_y = clf.predict(testX)
print(out_y)

[0 0 0 1 1 0 0 0 1 0 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0
 0 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1 1 0 0 1 0
 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0
 1 0 0]


In [9]:
out_score = clf.score(testX, testY)
print('Perceptron score:', out_score)

Perceptron score: 0.868421052631579


## SGD

In [10]:
from sklearn.linear_model import SGDClassifier
# Optimal learning rate: eta = 1.0 / (0.0001 * (t + t0))
# Losses: 
# ‘log’ loss gives logistic regression, 
# ‘perceptron’ is the linear loss used by the perceptron algorithm
# 'hinge' means SVM (wait for the next lecture): max(0, 1-<w,x>y)
clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1.0, learning_rate='optimal', eta0=1.0, random_state=42).fit(trainX, trainY) 
out_y = clf.predict(testX)
print(out_y)

[0 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1
 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 1 0]


In [11]:
out_score = clf.score(testX, testY)
print('Hinge SGD:', out_score)

Hinge SGD: 0.9473684210526315


Смотрите другие параметры на страничке класса: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html