<a href="https://colab.research.google.com/github/micheldc55/Deep-Learning/blob/main/05_mi_mlp_clasificacion_XOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix

%matplotlib inline

In [None]:
from sklearn.datasets import make_blobs

centers = np.array([[0,0],[1,1],[0,1],[1,0]])
X, y = make_blobs(n_samples=1000, n_features=2, centers=centers, cluster_std=0.1)
y[y==1] = 0
y[y>1]  = 1

plt.figure(figsize=(5,5))
plt.scatter(X[y==0,0], X[y==0,1], label='clase 0')
plt.scatter(X[y==1,0], X[y==1,1], label='clase 1')
plt.legend();

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

### Primero estandarizo

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)
X_train_sc = sc.transform(X_train)
X_test_sc  = sc.transform(X_test)

### ¿Puede resolver este problema una regresión logística?

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_sc, y_train)
y_train_pred = model.predict(X_train_sc)
y_test_pred  = model.predict(X_test_sc)

In [None]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_train, y_train_pred))
print(accuracy_score(y_test,  y_test_pred))

### **MLP implementado a mano**

In [None]:
a = np.array([np.array([1]),np.array([2]),np.array([3,4])])
a

In [None]:
a-a

In [None]:
a += .1*a

In [None]:
a

In [None]:
np.clip([0,-1,2], 0, None)

In [None]:
def my_model(x, W):
    bI, WI, bH, WH = W
    salida_oculta = np.maximum(bI + x.dot(WI), 0) # ReLU
    aux = bH + salida_oculta.dot(WH)
    return 1/(1+np.exp(-aux))

In [None]:
Noc = 2
w = np.array([np.random.randn(Noc),
              np.random.randn(X.shape[1], Noc),
              np.random.randn(1),
              np.random.randn(Noc)], dtype=object)
w

In [None]:
my_model(X_train_sc[:10], w)

In [None]:
from sklearn.metrics import log_loss

def mi_funcion_error(data, w):
    eps = 1e-20
    X, y = data
    ypred = my_model(X, w)
    return (-y*np.log(eps+ypred) - (1-y)*np.log(eps+(1-ypred))).mean() + 0.001*(w**2).sum().sum()


def mi_funcion_error(data, w):
    X, y = data
    ypred = my_model(X, w)
    return log_loss(y, ypred)


In [None]:
def calcula_grad(funcion, data, W, eps=1e-4):
    fw = funcion(data, W)
    bI, WI, bH, WH = W.copy()
    grad_bI = 0*bI
    grad_WI = 0*WI
    grad_bH = 0*bH
    grad_WH = 0*WH
    
    # perturbaciones en bI
    for i in range(len(bI)):
        a = bI[i]
        bI[i] += eps
        fw2 = funcion(data, np.array([bI, WI, bH, WH], dtype=object))
        grad_bI[i] = (fw2 - fw) / eps
        bI[i] = a
    
    # perturbaciones en WI
    for i in range(WI.shape[0]):
        for j in range(WI.shape[1]):
            a = WI[i,j]
            WI[i,j] += eps
            fw2 = funcion(data, np.array([bI, WI, bH, WH], dtype=object))
            grad_WI[i,j] = (fw2 - fw) / eps
            WI[i,j] = a
    
    # perturbaciones en bH
    a = bH[0]
    bH += eps
    fw2 = funcion(data, np.array([bI, WI, bH, WH], dtype=object))
    grad_bH[0] = (fw2 - fw) / eps
    bH[0] = a

    # perturbaciones en WH
    for i in range(len(WH)):
        a = WH[i]
        WH[i] += eps
        fw2 = funcion(data, np.array([bI, WI, bH, WH], dtype=object))
        grad_WH[i] = (fw2 - fw) / eps
        WH[i] = a
    
    return np.array([grad_bI, grad_WI, grad_bH, grad_WH], dtype=object)

In [None]:
calcula_grad(my_model, X_train_sc[:1], w)

In [None]:
X_train_sc[0]

In [None]:
#calcula_grad(my_model, X_train_sc[1], w)

In [None]:
X_train_sc[1]

In [None]:
w

In [None]:
#mi_funcion_error(my_model, (X_train_sc[:10], y_train_sc[:10]), w)
mi_funcion_error((X_train_sc[:10], y_train[:10]), w)

In [None]:
(w**2).sum().sum()

In [None]:
calcula_grad(mi_funcion_error, (X_train_sc[0:10], y_train[0:10]), w)

### Entrenamiento con batch

In [None]:
alpha = 2
nepocas = 100
batch_size = 20
Ntr = len(X_train_sc)

Noc = 2
w = np.array([np.random.randn(Noc),
              np.random.randn(X.shape[1], Noc),
              np.random.randn(1),
              np.random.randn(Noc)])

errores_tr = []
alpha_norm = alpha/batch_size

for e in range(nepocas):
    for b in range(0, Ntr, batch_size):
        X_lote = X_train_sc[b:(b+batch_size)]
        y_lote = y_train   [b:(b+batch_size)]
        grad = calcula_grad(mi_funcion_error, (X_lote,y_lote), w)
        w -= alpha_norm*grad
        
    error_total_tr = mi_funcion_error((X_train_sc,y_train), w)
    print("Error en training en época {}: {}".format(e, error_total_tr))
    errores_tr.append(error_total_tr)

In [None]:
plt.figure(figsize=(12,3))
plt.plot(errores_tr, label='error_total_tr')
plt.xlabel("época", fontsize=14)
plt.ylabel("error", fontsize=14)
plt.legend();

In [None]:
calcula_grad(mi_funcion_error, (X_train_sc, y_train), w)

In [None]:
y_tr_preds = 1*(my_model(X_train_sc, w)>0.5)
y_te_preds = 1*(my_model(X_test_sc, w)>0.5)

y_tr_preds

In [None]:
print(accuracy_score(y_train, y_tr_preds))
print(accuracy_score(y_test,  y_te_preds))