In [None]:
# Import modules
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Select random seed
random_state = 0

We use scikit-learn to generate a toy 2D data set (two features $x_1$ and $x_2$)  for binary classification  (two classes) 
 - each sample $(x_1,x_2)$ in the dataset is plotted as a 2D point where the two features $x_1$ and $x_2$ are displayed along the abscissa and ordinate axes respectively
 - the corresponding class label $y$ is displayed as a color mark (e.g., yellow or purple)

In [None]:
from sklearn.datasets import make_classification
#X are the features (aka inputs, ...), y the labels (aka responses, targets, output...)
X,y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_samples=150,
                          random_state=random_state, n_clusters_per_class=1)
# make the class labels y_i as +1 or -1 (instead of 0)
y[y==0]=-1
# display the dataset
plt.scatter(X[:,0], X[:,1], c=y)
plt.grid(True)
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
#plt.savefig("2d_binary_classif.pdf")

Then, a linear model is used to learn the classification function/rule.

In [None]:
from sklearn import linear_model
# Train a linear model, namely  RidgeClassifier, 
# this includes standard linear regression as particular case (alpha=0)
model = linear_model.RidgeClassifier(alpha=0)
model.fit(X,y) # this is the training function

In [None]:
# 
print(model.coef_)
print(model.intercept_)

In [None]:
help(model)

In [None]:
# Plot the decision functions
XX, YY = np.meshgrid(np.linspace(X[:,0].min(), X[:,0].max(),200),
                     np.linspace(X[:,1].min(), X[:,1].max(),200))
XY = np.vstack([XX.flatten(), YY.flatten()]).T # create a grid of point. Shape (40000, 2)
yp = model.predict(XY)
plt.contour(XX,YY,yp.reshape(XX.shape)) # the contour function draw level sets
plt.scatter(X[:,0], X[:,1], c=y)
plt.grid("on")
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')

In [None]:
XY.shape

In [None]:
# What are the parameter values of the linear boundary equation x_2=a x_1 + b?
# coef and intercept are defined such that c^T x + b = 0
a = -model.coef_[0][0]/model.coef_[0][1]
b = -model.intercept_[0]/model.coef_[0][1]
print('boudary equation x_2={} x_1 + {}'.format(a,b))

### Exo
Draw directly the line in matplotlib with the function plot

In [None]:
## EXO: draw a line y=ax+b
plt.scatter(X[:,0], X[:,1], c=y)
plt.grid("on")
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')

In [None]:
model.coef_

### Exercise
Change the number of informative features from  `n_informative=2̀` to `n_informative=1` in the `make_classification()` procedure, regenerate the data set and fit the classification rule. Interpret now the new decision boundary: are the two variables of equal importance in predicting the class of the data?

In [None]:
#get the documentation for sklearn RidgeClassification object
help(linear_model.RidgeClassifier)

In [None]:
help(make_classification)