In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# sample dataset with two-class targets (well suited for classification)
# generate dataset
X, y = mglearn.datasets.make_forge()
# plot dataset
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
plt.legend(["Class 0", "Class 1"], loc=4)
plt.xlabel("First feature")
plt.ylabel("Second feature")
print("X.shape: {}".format(X.shape))

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train)
print("test set predictions: {}".format(clf.predict(X_test)))
print("test set accuracy: {:.2f}".format(clf.score(X_test, y_test)))

In [None]:
# sample dataset with continuous targets (well suited for regression)
X,y = mglearn.datasets.make_wave(n_samples=40)
plt.plot(X, y, 'o')
plt.ylim(-3,3)
plt.xlabel("feature")
plt.ylabel("target")

In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer

cancer = load_breast_cancer()
print("cancer.keys(): {}".format(cancer.keys()))
print("shape of cancer data: {}".format(cancer.data.shape))
print("sample counts per class: {}".format(
{n: v for n, v in zip(cancer.target_names, np.bincount(cancer.target))}))

In [None]:
from sklearn.datasets import load_boston

X,y = mglearn.datasets.load_extended_boston()
print("X.shape: {}".format(X.shape))

In [None]:
from sklearn.datasets import make_blobs
from sklearn.svm import LinearSVC

# two-dimensional toy dataset containing three classes
X, y = make_blobs(random_state=42)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
plt.xlabel("feature 0")
plt.ylabel("feature 1")
plt.legend(["class 0", "class 1", "class 2"])

linear_svm = LinearSVC().fit(X, y)
line = np.linspace(-15, 15)
for coef, intercept, color in zip(linear_svm.coef_, linear_svm.intercept_, ['b', 'r', 'g']):
    plt.plot(line, -(line * coef[0] + intercept) / coef[1], c=color)

plt.ylim(-10, 15)
plt.xlim(-10, 8)
plt.xlabel("feature 0")
plt.ylabel("feature 1")
plt.legend(["class 0", "class 1", "class 2", "line class 0", "line class 1", "line class 2"], loc=(1.01, 0.3))

mglearn.plots.plot_2d_classification(linear_svm, X, fill=True, alpha=.7)


In [None]:
## Decision trees
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import graphviz

filename = "breast_cancer_decision_tree.dot"
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=42)
tree = DecisionTreeClassifier(max_depth=2, random_state=0)
tree.fit(X_train, y_train)
export_graphviz(tree, out_file=filename, class_names=["malignant", "benign"], feature_names=cancer.feature_names, impurity=False)

with open(filename) as f:
    dot_graph = f.read()
    
graphviz.Source(dot_graph)