# Data Science Bootcamp
# <center> **Aula 19 -- Logistic Classifier**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

## Load data

In [None]:
iris = load_iris()
X = iris['data']
y = iris['target']

In [None]:
colors = {0:'red', 1:'blue', 2:'green'}
c = []
for i in range(0, len(y)):
    c.append(colors[y[i]])

In [None]:
plt.figure()
plt.scatter(X[:,0], X[:,1], c=c)
plt.show()

In [None]:
# separamos en clases 0, 1, 2
y0 = np.argwhere(y==0)
y1 = np.argwhere(y==1)
y2 = np.argwhere(y==2)

X0 = X[y0]
X1 = X[y1]
X2 = X[y2]

# extract class 0 and 1
X01 = np.vstack([X0,X1]).reshape(-1,4)
y01 = np.stack(np.array(50*[0] + 50*[1]))

# extract class 1 and 2
X12 = np.vstack([X1,X2]).reshape(-1,4)
y12 = np.stack(np.array(50*[1] + 50*[2]))

In [None]:
print(X01.shape, y01.shape)
print(X12.shape, y12.shape)

## Clasificador logístico

In [None]:
def sigmoid(X, w):
    ''' Computes sigmoid function.
        Inputs:  X = data, as numpy array of shape (n,d+1).
                 w = vector of coeficients, as numpy array of shape (d+1,).
        Outputs: y = sigmoid function evaluated on each entry.
    '''
    return 1./(1. + np.exp(-X@w))


def fit(X, y, w0, alpha, maxIter=1000, eps=1e-3):
    ''' Iteración del clasificador logístico por descenso gradiente.
        Inputs:  X  = data, as numpy array of shape (n,d).
                 y  = vector of class labels (0 and 1) ad numpy arryas of shape (n,). 
                 w0 = initial vector of coeficients.
                 alpha = step size.
                 maxIter = max numer of iterations in gradient descent.
                 eps = convergence tolerance.
        Outputs: w     = optimal vector of coefficients.
                 conv  = convergence flag (1 = method converged; 0 = not).
                 error = error after last step.
                 i     = number of total iterations. 
    '''
    n = X.shape[0]
    d = X.shape[1]

    # Add 1 dimension for the bias term
    X = np.concatenate([np.ones((n,1)), X], axis=1)

    # init data
    w = w0.copy()
    error = 10.*eps
    i = 0
    conv = 0

    # main iteration
    while ((i <= maxIter) and (error > eps)):
        wold = w.copy()
        diff = alpha*(y - sigmoid(X, w))
        for j in range(0, n):
            w += diff[j]*X[j,:]
        error = (np.abs(w - wold)).sum()
        if (error < eps): conv = 1
        i = i+1
    return w, conv, error, i

    
def predict(X, w):
    ''' Predicts labels on a dataset, by using optimal weights.
        Inputs:  X = data, as numpy array of shape (n,d),
                 w  = optimal vector (after fit), of shape (d+1,).
        Outputs: y    = real valuated predictions by logistic regression,
                 yhat = binary predictions (1 = if y > 0.5, 0 otherwise). 
    '''
    n = X.shape[0]
    # Add column of 1s
    X = np.concatenate([np.ones((n,1)), X], axis=1)
    y = sigmoid(X, w)                  # real labels in (0,1)
    yhat = (y > 0.5).astype(np.uint8)  # binary labels
    return y, yhat

### Experimento 1: Clase 0 y 1 (linealmente separable)

In [None]:
alpha   = 1e-3
maxIter = 5000
eps     = 1e-3
#w0 = np.zeros(3)
w0 = np.random.rand(3,)

In [None]:
# Training (gradient descent)

w, conv, err, it = fit(X01[:,[0,1]], y01, w0, alpha, maxIter, eps)
print(w, conv, err, it)

In [None]:
yprob, yhat = predict(X01[:,[0,1]], w)
print(yprob)
print(yhat)

In [None]:
cfmatrix = confusion_matrix(y01, yhat)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y01, yhat))

## Regiones de clasificación

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X01[:,0].min() - 1, X01[:,0].max() + 1
y_min, y_max = X01[:,1].min() - 1, X01[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
plt.figure(figsize=(6,6))
Zp, Z = predict(np.c_[xx.ravel(), yy.ravel()], w)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X01[:,0], X01[:,1], c=y01, s=20, edgecolor='k')
plt.title('Clasificador Logístico')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

### Experimento 2: Clase 1 y 2 (no separable)

In [None]:
alpha   = 1e-3
maxIter = 5000
eps     = 1e-3
#w0 = np.zeros(3)
w0 = np.random.rand(3,)

In [None]:
# Training (gradient descent)

w, conv, err, it = fit(X12[:,[0,1]], y01, w0, alpha, maxIter, eps)
print(w, conv, err, it)

In [None]:
yprob, yhat = predict(X12[:,[0,1]], w)
print(yprob)
print(yhat)

In [None]:
cfmatrix = confusion_matrix(y01, yhat)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y01, yhat))

## Regiones de clasificación

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X12[:,0].min() - 1, X12[:,0].max() + 1
y_min, y_max = X12[:,1].min() - 1, X12[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
plt.figure(figsize=(6,6))
Zp, Z = predict(np.c_[xx.ravel(), yy.ravel()], w)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X12[:,0], X12[:,1], c=y01, s=20, edgecolor='k')
plt.title('Clasificador Logístico')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

## Sklearn

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression()
#penalty='l2', *, dual=False, tol=0.0001, C=1.0, fit_intercept=True,
#intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs',
#max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None,
#l1_ratio=None)[source]

In [None]:
model.fit(X01[:,[0,1]], y01)

In [None]:
model.coef_

In [None]:
ypred = model.predict(X01[:,[0,1]])
print(ypred)

In [None]:
cfmatrix = confusion_matrix(y01, ypred)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y01, ypred))

## Regiones de clasificación

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X01[:,0].min() - 1, X01[:,0].max() + 1
y_min, y_max = X01[:,1].min() - 1, X01[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
plt.figure(figsize=(6,6))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X01[:,0], X01[:,1], c=y01, s=20, edgecolor='k')
plt.title('Clasificador Logistico')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()

In [None]:
model2 = LogisticRegression()

In [None]:
model2.fit(X12[:,[0,1]], y01)

In [None]:
model2.coef_

In [None]:
ypred2 = model2.predict(X12[:,[0,1]])
print(ypred2)

In [None]:
cfmatrix2 = confusion_matrix(y01, ypred2)
print(cfmatrix2)

In [None]:
plt.figure()
sns.heatmap(cfmatrix2, annot=True)
plt.show()

In [None]:
print(classification_report(y01, ypred2))

## Regiones de clasificación

In [None]:
# Plotting decision regions
h = 0.01
x_min, x_max = X12[:,0].min() - 1, X12[:,0].max() + 1
y_min, y_max = X12[:,1].min() - 1, X12[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

In [None]:
plt.figure(figsize=(8,8))
Z = model2.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X12[:,0], X12[:,1], c=y01, s=20, edgecolor='k')
plt.title('Clasificador Logistico')
#plt.h([0,0], w, 'k-')
plt.xlim([x_min,x_max])
plt.ylim([y_min,y_max])
plt.show()