In [None]:
# !pip3 install scikit-learn --upgrade

In [None]:
import IPython
print('IPython:', IPython.__version__)

import numpy
print('numpy:', numpy.__version__)

import scipy
print('scipy:', scipy.__version__)

import matplotlib
print('matplotlib:', matplotlib.__version__)

import sklearn
print('scikit-learn:', sklearn.__version__)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["figure.figsize"] = (10,10)

# Example of Classification

In [None]:
from sklearn.linear_model import SGDClassifier
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html
from sklearn.datasets.samples_generator import make_blobs
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html

In [None]:
X, Y = make_blobs(n_samples=50, centers=2,
                  random_state=0, cluster_std=0.5)

plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)

plt.xlim(-1,4)
plt.ylim(-1,6)

In [None]:
# fit the model
clf = SGDClassifier(loss="hinge", alpha=0.01,
                    max_iter=1000, fit_intercept=True)
clf.fit(X, Y)

# plot the line, the points, and the nearest vectors to the plane
xx = np.linspace(-1, 5, 10)
yy = np.linspace(-1, 5, 10)
X_t, Y_t = np.meshgrid(xx, yy)

Z = np.empty(X_t.shape)

for (i, j), val in np.ndenumerate(X_t):
    x = val
    y = Y_t[i, j]
    p = clf.decision_function(np.array([[x, y]]))
    Z[i, j] = p[0]

levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
colors = 'k'

ax = plt.axes()
ax.contour(X_t, Y_t, Z, levels, colors=colors, linestyles=linestyles)
ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)

ax.axis('tight')
ax.set_xlim(-1,5)
ax.set_ylim(-1,6)

In [None]:
unseen_point = np.random.random(2) * 6 - 1
print(unseen_point)

In [None]:
p = clf.decision_function(np.array([unseen_point]))
# Confidence scores per (sample, class) combination. In the binary case, confidence score for self.classes_[1] where >0 means this class would be predicted.
print(p)
print("1" if p < 0 else "0")

In [None]:
clf.predict([unseen_point])

# Example of Regression

In [None]:
from sklearn.linear_model import LinearRegression
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [None]:
a = 0.5
b = 1.0

# x from 0 to 10
x = 30 * np.random.random(20)
print(x)
# y = a*x + b with noise
y = a * x + b + np.random.normal(size=x.shape)

# create a linear regression classifier
clf = LinearRegression()
clf.fit(np.array([x]).T, y)

# predict y from the data
x_new = np.linspace(0, 30, 100)
y_new = clf.predict(x_new[:, None])

# plot the results
ax = plt.axes()
ax.scatter(x, y)
ax.plot(x_new, y_new)

ax.set_xlabel('x')
ax.set_ylabel('y')

ax.axis('tight')

In [None]:
print(clf)

In [None]:
print(clf.coef_, clf.intercept_)

# Representation of Data in Scikit-learn

![data-layout](data-layout.png)

# A Simple Example: the Iris Dataset

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

In [None]:
iris.keys()

In [None]:
iris['data'].shape

In [None]:
print(iris['feature_names'])

In [None]:
print(iris['target_names'])

In [None]:
print(iris['target'])

In [None]:
# this formatter will label the colorbar with the correct target names
formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)])

x_index = 0
y_index = 1

plt.scatter(iris.data[:, x_index], iris.data[:, y_index],
            c=iris.target, cmap=plt.cm.get_cmap('Set1', 3))

plt.colorbar(ticks=[0, 1, 2], format=formatter)
plt.clim(-0.5, 2.5)

plt.xlabel(iris.feature_names[x_index])
plt.ylabel(iris.feature_names[y_index]);

# Other dataset

[https://scikit-learn.org/stable/datasets/index.html](https://scikit-learn.org/stable/datasets/index.html)