<a href="https://colab.research.google.com/github/micheldc55/Deep-Learning/blob/main/07_mi_mlp_clasificacion_optimizada.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
COLAB = True

In [None]:
if COLAB:
    from google_drive_downloader import GoogleDriveDownloader as gdd
    gdd.download_file_from_google_drive(file_id='1q2shS6fBq_pSI8CBrjzdgg6DTTZfZ2mJ',
                                        dest_path='./funciones_auxiliares.py')
    gdd.download_file_from_google_drive(file_id='1o2ZUFTRmKf9N8Dx4WDzF2fzaYGErpTlF',
                                        dest_path='./datasets/fuga_clientes/fuga_clientes_empresa_telefonica_construccion.csv')

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix

%matplotlib inline

In [None]:
FUGA_CLIENTES = True

if FUGA_CLIENTES:
    path = './datasets/fuga_clientes/'
    X_df = pd.read_csv(path+'fuga_clientes_empresa_telefonica_construccion.csv')
    X_df.drop('Customer ID', axis=1, inplace=True)
    X = X_df.drop("Churn Status", axis=1).values
    y = X_df["Churn Status"].values
else:
    from sklearn.datasets import load_breast_cancer
    
    a = load_breast_cancer()
    
    X = a.data
    y = a.target
    X_df = pd.DataFrame(columns=a.feature_names, data=X)
    X_df["target"] = y

X_df[:5]

In [None]:
np.unique(y)

In [None]:
X.shape

In [None]:
y.shape

In [None]:
import seaborn as sns

sns.clustermap(X_df.corr(), vmin=-1, vmax=1, cmap='bwr');

In [None]:
#X_df.drop('AveBedrms', axis=1, inplace=True)
#X_df.drop(['Latitude', 'Longitude'], axis=1, inplace=True)

X_df[:5]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)
X_train_sc = sc.transform(X_train)
X_test_sc  = sc.transform(X_test)

model = LogisticRegression()
model

In [None]:
model.fit(X_train_sc, y_train)
y_train_pred = model.predict(X_train_sc)
y_test_pred  = model.predict(X_test_sc)

### Primero estandarizo

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)
X_train_sc = sc.transform(X_train)
X_test_sc  = sc.transform(X_test)

### Pruebo una regresión logística

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_sc, y_train)
y_train_pred = model.predict(X_train_sc)
y_test_pred  = model.predict(X_test_sc)

In [None]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_train, y_train_pred))
print(accuracy_score(y_test,  y_test_pred))

### MLP implementado a mano

In [None]:
np.clip([0,-1,2], 0, None)

In [None]:
def my_model(x, W, hidden_state=False):
    bI, WI, bH, WH = W
    salida_oculta = np.maximum(bI + x.dot(WI), 0)
    aux = bH + salida_oculta.dot(WH)
    
    if not hidden_state:
        return 1/(1+np.exp(-aux))
    else:
        return 1/(1+np.exp(-aux)), salida_oculta


In [None]:
Noc = 2
w = np.array([np.random.randn(Noc),
              np.random.randn(X.shape[1], Noc),
              np.random.randn(1),
              np.random.randn(Noc)], dtype=object)
w

In [None]:
print(X_train[0].shape)
print(w.shape)

In [None]:
my_model(X_train_sc[0:10], w)

In [None]:
def calcula_grad_mlp(data, w):
    W0, W1, W2, W3 = w
    X, y = data
    y_preds, salidas_ocultas = my_model(X, w, hidden_state=True)

    grad_W0 = W0.copy()*0
    grad_W1 = W1.copy()*0
    grad_W2 = W2.copy()*0
    grad_W3 = W3.copy()*0
    
    Nocultas = len(grad_W0)
    
    for x_i,y_i,y_pred_i,hidden in zip(X,y,y_preds,salidas_ocultas):
        aux = y_pred_i - y_i
        grad_W2 += aux
        grad_W3 += aux*hidden
        for h in range(Nocultas):
            if hidden[h]>0:
                grad_W0[h] += aux*W3[h]
                grad_W1[:,h] += aux*W3[h]*x_i
         
    return np.array([grad_W0, grad_W1, grad_W2, grad_W3])

In [None]:
def calcula_grad_mlp2(data, W):
    bI, WI, bH, WH = W
    X, y = data
    y_preds, salidas_ocultas = my_model(X, W, hidden_state=True)

    grad_bI = bI.copy()*0
    grad_WI = WI.copy()*0
    grad_bH = bH.copy()*0
    grad_WH = WH.copy()*0
    
    Nocultas = len(grad_bI)
    
    for x_i,y_i,y_pred_i,hidden in zip(X,y,y_preds,salidas_ocultas):
        aux = y_pred_i - y_i
        grad_bH += aux
        grad_WH += aux*hidden
        for h in range(Nocultas):
            if hidden[h]>0:
                grad_bI[h] += aux*WH[h]
                grad_WI[:,h] += aux*WH[h]*x_i
         
    return np.array([grad_bI, grad_WI, grad_bH, grad_WH], dtype=object)


def calcula_grad_mlp(data, W):
    bI, WI, bH, WH = W
    X, y = data
    y_preds, salidas_ocultas = my_model(X, W, hidden_state=True)
    aux = y_preds - y
    grad_bH = np.array([sum(aux)])
    grad_WH = aux.dot(salidas_ocultas)    
    a = np.outer(aux, WH)
    a[salidas_ocultas<=0] = 0
    grad_WI = X.T.dot(a)    
    grad_bI = a.sum(axis=0)
         
    return np.array([grad_bI, grad_WI, grad_bH, grad_WH], dtype=object)


In [None]:
a=w[3][:4]
a

In [None]:
np.outer(a,a)

In [None]:
a = calcula_grad_mlp((X_train_sc, y_train), w)

In [None]:
b = calcula_grad_mlp2((X_train_sc, y_train), w)

In [None]:
np.abs(a-b).sum().sum()

### Entrenamiento con batch

In [None]:
X_train_sc.shape

In [None]:
from sklearn.metrics import log_loss

In [None]:
alpha = 0.5
nepocas = 1500
batch_size = 100
Ntr = len(X_train_sc)

Noc = 2*5
w = np.array([np.random.randn(Noc),
              np.random.randn(X.shape[1], Noc),
              np.random.randn(1),
              np.random.randn(Noc)], dtype=object)

errores_tr = []
alpha_norm = alpha/batch_size

for e in range(nepocas):
    for b in range(0, Ntr, batch_size):
        X_lote = X_train_sc[b:(b+batch_size)]
        y_lote = y_train   [b:(b+batch_size)]
        grad = calcula_grad_mlp((X_lote,y_lote), w)
        w -= alpha_norm*grad
        
    error_total_tr = log_loss(y_train, my_model(X_train_sc, w))
    print("Error en training en época {}: {}".format(e, error_total_tr))
    errores_tr.append(error_total_tr)

In [None]:
grad[1].shape

In [None]:
errores_tr = np.array(errores_tr)

In [None]:
plt.figure(figsize=(12,3))
plt.plot(errores_tr, label='error_total_tr')
plt.xlabel("época", fontsize=14)
plt.ylabel("error", fontsize=14)
plt.legend()
plt.show()
plt.figure(figsize=(12,3))
aux = (errores_tr[100:]-errores_tr[:-100])[:]
plt.plot(aux, label='diff_error_total_tr')
plt.plot([0,len(aux)],[0,0], 'r')
plt.xlabel("época", fontsize=14)
plt.ylabel("diff. error", fontsize=14)
plt.legend();

In [None]:
y_tr_preds = 1*(my_model(X_train_sc, w)>0.5)
y_te_preds = 1*(my_model(X_test_sc, w)>0.5)

y_tr_preds

In [None]:
print(accuracy_score(y_train, y_tr_preds))
print(accuracy_score(y_test,  y_te_preds))