<a href="https://colab.research.google.com/github/micheldc55/Deep-Learning/blob/main/04_mi_clasificador_caras_optimizado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
COLAB = True

In [None]:
if COLAB:
    from google_drive_downloader import GoogleDriveDownloader as gdd
    gdd.download_file_from_google_drive(file_id='1q2shS6fBq_pSI8CBrjzdgg6DTTZfZ2mJ',
                                        dest_path='./funciones_auxiliares.py')
    gdd.download_file_from_google_drive(file_id='1qV3hqJFajv6iYLVqwtMIvr6Mo_GOOdQv',
                                        dest_path='./caras_aux.py')
    gdd.download_file_from_google_drive(file_id='1d_94YSDZu662q3JJ17r1dC1BHfOCTKZU',
                                        dest_path='./datasets/gender.tgz')
    !tar -xvf datasets/gender.tgz
    !mv gender datasets

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix

%matplotlib inline

La base de datos usada es un subconjunto de la base de datos
"Labeled Faces in the Wild" ("LFW"):

  http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz

  http://vis-www.cs.umass.edu/lfw/

La separación en carpetas "female" / "male" se ha realizado usando
un código basado en:
https://github.com/Pletron/LFWgender

In [None]:
from caras_aux import loadImages, plot_gallery

# cargo las imágenes y preparo los datos que se van a usar
factor = 0.3 # factor de reducción de resolución de las imágenes, debe ser
             # >0 y <=1 (en cuyo caso la resolución es la original, 250x250)
X_clase0, height, width = loadImages('./datasets/gender/female',
                                     factor=factor, nimages=1000)
X_clase1, height, width = loadImages('./datasets/gender/male',
                                     factor=factor, nimages=1000)
X = np.concatenate((X_clase0, X_clase1))
n_samples, n_features = X.shape

target_names = np.array(['female', 'male'])
n_classes = len(target_names)
N0 = len(X_clase0); N1 = len(X_clase1)
y = N0*[0] + N1*[1]
y = np.array(y)

print('\nEstadísticas básicas de la base de datos leída:')
print('  # clases: %d' % n_classes)
print('  # patrones: %d' % n_samples)
print('  # atributos (features): %d' % n_features)

print('\nAlgunas imágenes de la clase "female":')
plot_gallery(X_clase0[:12].reshape((12, height, width)),
             12*['female'], cmap=plt.cm.gray)
print('\nAlgunas imágenes de la clase "male":')
plot_gallery(X_clase1[:12].reshape((12, height, width)),
             12*['male'], cmap=plt.cm.gray)

In [None]:
print(np.shape(X)); print(np.shape(y)); print(X)

In [None]:
X.min(), X.max()

In [None]:
X /= 255

In [None]:
np.unique(y)

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
    
# divido en conjuntos de training y test
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=200,
                                                    random_state=42)

In [None]:
from caras_aux import report_base_error

plt.figure(figsize=(10,3))
ax = plt.subplot(1,2,1)
clases, counts = np.unique(y_train, return_counts=True)
plt.bar(clases[0], 100*counts[0]/len(y_train))
plt.bar(clases[1], 100*counts[1]/len(y_train))
plt.title('Training'); plt.xlabel('Clase'); plt.ylabel('Frequency (%)'); ax.set_xticks(clases)

ax = plt.subplot(1,2,2)
none, counts = np.unique(y_test, return_counts=True)
plt.bar(clases[0], 100*counts[0]/len(y_test))
plt.bar(clases[1], 100*counts[1]/len(y_test))
plt.title('Test'); plt.xlabel('Clase'); ax.set_xticks(clases); plt.show()

report_base_error(X_train, y_train, X_test, y_test)

Otra forma de hacer el mismo cálculo:

In [None]:
from sklearn.dummy import DummyClassifier

clf = DummyClassifier(strategy='prior')
clf.fit(X=X_train, y=y_train)
print('Score del clasificador Dummy en test:', clf.score(X=X_test,y=y_test)) # en realidad no usa X_test

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model

In [None]:
model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred  = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_train, y_train_pred))
print(accuracy_score(y_test,  y_test_pred))

In [None]:
from funciones_auxiliares import analisis_roc, plot_confusion_matrix

In [None]:
# Resultados detallados en test
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
analisis_roc(y_test, model.predict_proba(X_test)[:,1])
plt.subplot(1,2,2)
plot_confusion_matrix(y_test, y_test_pred)

### Regresión logística implementada a mano

In [None]:
37*37

In [None]:
def my_model(x, w):
    return 1/(1+np.exp(-(w[0] + (x*w[1:]).sum(axis=1)))) + 1*np.mean(w**2)

In [None]:
w = np.random.randn(X.shape[1]+1)
w

In [None]:
print(X_train[0].shape)
print(w.shape)

In [None]:
my_model(X_train[0:10], w)

**Función de error:**

\begin{equation}
logloss(data,w) = - \sum_i y_i \cdot \log {\hat y}_i - \sum_i (1-y_i) \cdot \log (1-{\hat y}_i)
\end{equation}

**Notas:**

* $0 \cdot \log 0$ se toma como 0
* Clases binarias: la clase real $y_i$ sólo puede ser 0 o 1
* ${\hat y}_i$ es la predicción del modelo para el caso $i$. Dicha predicción es una probabilidad, y depende de $w$ y de $X_i$

**Sensibilidad** (gradiente) de logloss a una perturbación en el parámetro $w_k$:

\begin{equation}
\frac{\partial}{\partial \, w_k} \, logloss(data,w) = - \frac{\partial}{\partial \, w_k} \sum_i y_i \cdot \log {\hat y}_i - \frac{\partial}{\partial \, w_k} \sum_i (1-y_i) \cdot \log (1-{\hat y}_i)
\end{equation}
$$
= - \sum_i \frac{\partial}{\partial \, w_k} \left[ y_i \cdot \log {\hat y}_i \right] - \sum_i \frac{\partial}{\partial \, w_k} \left[ (1-y_i) \cdot \log (1-{\hat y}_i) \right]
$$
$$
= - \sum_i  y_i \cdot \frac{\partial}{\partial \, w_k} \log {\hat y}_i  - \sum_i (1-y_i) \cdot \frac{\partial}{\partial \, w_k} \log (1-{\hat y}_i)
$$

Por otra parte:

$$
{\hat y}_i = \frac{1}{1 + e^{-z_i}}
$$
con
$$
z_i = w_0 + \sum_{k=1} w_k \cdot X_{i,k-1}
$$

La magnitud ${\hat y}_i$ depende de $z_i$, que a su vez depende de $w$, por lo que según la regla de la cadena de derivación:

$$
\frac{\partial}{\partial \, w_k} \log {\hat y}_i =
\left( \frac{d}{d \, z_i} \log {\hat y}_i \right) \cdot
\left( \frac{\partial}{\partial \, w_k} z_i \right)
$$

Si se desarrolla un poco más:

$$
\frac{\partial}{\partial \, w_k} \log {\hat y}_i =
\left( \frac{1}{{\hat y}_i} \cdot \frac{d}{d \, z_i} {\hat y}_i \right) \cdot
\left( \frac{\partial}{\partial \, w_k} z_i \right)
$$

Por una parte,

$$
\frac{d}{d \, z_i} {\hat y}_i =
\frac{d}{d \, z_i} \left( \frac{1}{1 + e^{-z_i}} \right) =
\frac{e^{-z_i}}{\left( 1 + e^{-z_i} \right)^2} =
{\hat y}_i \cdot (1 - {\hat y}_i)
$$

Y por otra:

* $\frac{\partial}{\partial \, w_0} z_i = 1$

* $\frac{\partial}{\partial \, w_k} z_i = X_{i,k-1}$ para $k>0$

Utilizando todo esto en:

\begin{equation}
\frac{\partial}{\partial \, w_k} \, logloss(data,w) = - \sum_i  y_i \cdot \frac{\partial}{\partial \, w_k} \log {\hat y}_i  - \sum_i (1-y_i) \cdot \frac{\partial}{\partial \, w_k} \log (1-{\hat y}_i)
\end{equation}

nos queda:

$$
\frac{\partial}{\partial \, w_0} \, logloss(data,w) = - \sum_i y_i \cdot (1 - {\hat y}_i) + \sum_i (1 - y_i) \cdot {\hat y}_i = \sum_i \left( {\hat y}_i - y_i \right)
$$
$$
\frac{\partial}{\partial \, w_k} \, logloss(data,w) = \sum_i \left( {\hat y}_i - y_i \right) \cdot X_{i,k-1}
$$

Las ecuaciones para cálculo de gradiente son entonces:

$$
\frac{\partial}{\partial \, w_0} \, logloss(data,w) = \sum_i \left( {\hat y}_i - y_i \right)
$$
$$
\frac{\partial}{\partial \, w_k} \, logloss(data,w) = \sum_i \left( {\hat y}_i - y_i \right) \cdot X_{i,k-1}
$$

In [None]:
def calcula_grad_logreg_logloss(data, w):
    # logreg:  modelo
    # logloss: función de error
    
    X, y   = data
    y_pred = my_model(X, w)
    aux = y_pred-y
    
    grad = np.zeros(len(w))
    grad[0]  = aux.sum()
    grad[1:] = aux.dot(X)
    
    return grad # + 2*0.5*w

In [None]:
calcula_grad_logreg_logloss((X_train[:1],y_train[:1]), w)

In [None]:
X_train[0]

In [None]:
#calcula_grad(my_model, X_train_sc[1], w)

In [None]:
X_train[1]

### Entrenamiento con batch

In [None]:
from sklearn.metrics import log_loss

In [None]:
alpha = 0.01
nepocas = 500
#batch_size = len(X_train) # 20 # 20 ha funcionado muy bien
batch_size = 50
Ntr = len(X_train)
w = np.random.randn(X.shape[1]+1)*0
errores_tr = []
alpha_norm = alpha/batch_size

for e in range(nepocas):
    for b in range(0, Ntr, batch_size):
        X_lote = X_train[b:(b+batch_size)]
        y_lote = y_train[b:(b+batch_size)]
        grad = calcula_grad_logreg_logloss((X_lote,y_lote), w)
        w -= alpha_norm*grad
        
    error_total_tr = log_loss(y_train, my_model(X_train, w))
    print("Error en training en época {}: {}".format(e, error_total_tr))
    errores_tr.append(error_total_tr)

In [None]:
plt.figure(figsize=(12,3))
plt.plot(errores_tr, label='error_total_tr')
plt.xlabel("época", fontsize=14)
plt.ylabel("error", fontsize=14)
plt.legend();

In [None]:
print(model.intercept_)
print(model.coef_)

In [None]:
print(w[0])
print(w[1:])

In [None]:
y_proba_tr_preds = my_model(X_train, w)
y_proba_te_preds = my_model(X_test,  w)

y_tr_preds       = 1*(y_proba_tr_preds > 0.5)
y_te_preds       = 1*(y_proba_te_preds > 0.5)

y_tr_preds

In [None]:
print(accuracy_score(y_train, y_tr_preds))
print(accuracy_score(y_test,  y_te_preds))

In [None]:
# Resultados detallados en test
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
analisis_roc(y_test, y_proba_te_preds)
plt.subplot(1,2,2)
plot_confusion_matrix(y_test, y_te_preds)

In [None]:
from caras_aux import representa_algunas_predicciones

plt.figure(figsize=(5, 5))
aux = max(abs(w[1:]))
w_show = w[1:].reshape((height, width))/aux
plt.imshow(w_show, plt.cm.bwr, vmin=-1, vmax=1)
plt.title('Sensibilidad al pixel de entrada (heatmap)', size=12)
plt.xticks(); plt.yticks()

print('\nClasificaciones realizadas en test por LDA:')
inds = representa_algunas_predicciones(X_test, y_test, y_te_preds,
                                       target_names,
                                       height, width,
                                       show_only_errors=False)

In [None]:
w_rgb = np.zeros((height, width, 3))
w_rgb[:,:,0] = 1*(w_show>0)
w_rgb[:,:,1] = (w_show>0)*(1-w_show)
w_rgb[:,:,2] = (w_show>0)*(1-w_show)

w_rgb[:,:,2] += 1*(w_show<0)
w_rgb[:,:,0] += (w_show<0)*(1+w_show)
w_rgb[:,:,1] += (w_show<0)*(1+w_show)


In [None]:
plt.imshow(w_rgb);

In [None]:
ind = inds[0]
imagen_rgb = np.zeros((height, width, 3))
imagen_rgb[:,:,0] = imagen_rgb[:,:,1] = imagen_rgb[:,:,2] = X_test[ind].reshape((height, width))

In [None]:
plt.imshow(0.7*imagen_rgb + 0.5*w_rgb);