In [64]:
"""
Following up on our implementations of linear classification models from scratch,
we will now use scikit-learn's linear classification models and see how they fare
compared to ours
"""
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

In [65]:
iris = load_iris()
features = iris['data']
targets = iris['target']

In [66]:
##Train, test split
x_train, x_test, y_train, y_test = train_test_split(features, targets, stratify=targets, test_size=0.2)

In [67]:
##Training and testing the sklearn Perceptron
ppn = Perceptron(eta0=4, max_iter=500, random_state=1)
ppn = ppn.fit(x_train, y_train)
ppn.score(x_test, y_test)

0.9666666666666667

In [68]:
## Scaling data for logistic regression
sc = StandardScaler()
sc = sc.fit(x_train)
x_train_sc = sc.transform(x_train)
x_test_sc = sc.transform(x_test)

In [69]:
## Training and testing the sklearn LogisticRegression
lrg = LogisticRegression(max_iter=50, random_state=1)
lrg = lrg.fit(x_train_sc, y_train)
lrg.score(x_test_sc, y_test)

0.9666666666666667

In [70]:
## Sklearn models work fine for iris, what about for breast cancer?
breast_cancer = load_breast_cancer()
features = breast_cancer['data']
targets = breast_cancer['target']

In [71]:
## Train, test fit
x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.3, stratify=targets)

In [72]:
## Scaling for logistic regression
sc = sc.fit(x_train)
x_train_sc = sc.transform(x_train)
x_test_sc = sc.transform(x_test)

In [73]:
## Testing the perceptron
ppn = Perceptron(eta0=4, max_iter=50, random_state=1)
ppn = ppn.fit(x_train, y_train)
ppn.score(x_test, y_test)

0.8947368421052632

In [74]:
## Testing logistic regression
lrg = LogisticRegression(max_iter=10000, random_state=1)
lrg = lrg.fit(x_train_sc, y_train)
lrg.score(x_test_sc, y_test)

0.9707602339181286