# Autoencoders

 En esta rutina utilizaremos Autoencoders para reducir la dimensión de nuestros datos. Como usaremos Colab, usaremos las siguientes instrucciones:

In [None]:
!pip install --upgrade --force-reinstall tf-nightly-gpu-2.0-preview
!git clone https://github.com/milocortes/Autoencoder

Importamos las librerías necesarias:

In [None]:
import os
import sys
# scientific python stack
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ML/DL
import sklearn
import sklearn.model_selection
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow.keras.layers as tkl
import tensorflow_probability as tfp
tfd = tfp.distributions
print('Tensorflow:{}'.format(tf.__version__))
print('Keras:{}'.format(tfk.__version__))
print('Tf-prob:{}'.format(tfp.__version__))

Cambiamos el directorio para poder cargar nuestros datos:

In [None]:
cd /content/Autoencoder/data/

Cargamos los datos:

In [None]:
df = pd.read_csv('muni_2015_ine_inegi.csv')

# Eliminamos la primer columna
df.drop(df.columns[[0]], axis=1, inplace=True)

print(df.shape)
df.head()

Generamos la matriz de diseño:

In [None]:
#from sklearn.preprocessing import StandardScaler
train_index,test_index = train_test_split(df.index,test_size=0.2)
x = np.vstack(np.array(df).tolist()).astype(np.float32)
#y = df['logp'].values.reshape(-1,1).astype(np.float32)
x_train,x_test = x[train_index],x[test_index]
#y_train,y_test = y[train_index],y[test_index]
#print(x.shape,y.shape)
print(x.shape)

## Reducimos a 2 dimensiones con PCA

In [None]:
from sklearn.decomposition import PCA

model = PCA(2)
x_pca = model.fit_transform(x_test)
plt.figure(figsize=(8,8))
plt.scatter(x_pca[:,0],x_pca[:,1],s=1)
plt.show()

## Construimos un PCA con un linear autoencoder

In [None]:
tfkl = tf.keras.layers

latent_dim=2
input_dim = x.shape[-1]
encoder = tf.keras.Sequential([
        tfkl.InputLayer(input_shape=[input_dim]),
        tfkl.Dense(latent_dim,activation=None)])
decoder = tf.keras.Sequential([
        tfkl.InputLayer(input_shape=[latent_dim]),
        tfkl.Dense(input_dim,activation=None)])

ae = tfk.Model(inputs=encoder.inputs,outputs=decoder(encoder.outputs)) 
ae.compile('adam',loss='mse')
ae.summary()
ae.fit(x_train,x_train,batch_size=64,epochs=100)

## Encodificar, decodificar

In [None]:
z = encoder.predict(x_test)
recon_x = decoder.predict(z)
print(np.abs(recon_x[0]-x[0]))
print(np.linalg.norm(recon_x[0]-x[0]))

## Visualizamos el espacio latente

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(z[:,0],z[:,1],s=1)
plt.show()

# Stacked Autoencoders

### Espacio latente $z$ de dos dimensiones

In [None]:

tfkl = tf.keras.layers

latent_dim=2
input_dim = x.shape[-1]

## Definimos el encoder
encoder = tf.keras.Sequential([
        tfkl.InputLayer(input_shape=[input_dim]),
        tfkl.Dense(150, activation='relu', kernel_initializer='normal'),
        tfkl.Dense(100, activation='relu', kernel_initializer='normal'),
        tfkl.Dense(50, activation='relu', kernel_initializer='normal'),
        tfkl.Dense(10, activation='relu', kernel_initializer='normal'),
        tfkl.Dense(latent_dim,activation=None)])
## Encodificamos
z = encoder.predict(x_test)

###  Visualizamos el espacio latente

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(z[:,0],z[:,1],s=1)
plt.show()

### Espacio latente $z$ de tres dimensiones

In [None]:
tfkl = tf.keras.layers

## Definimos la dimensión
latent_dim=3
input_dim = x.shape[-1]

## Definimos el encoder

encoder = tf.keras.Sequential([
        tfkl.InputLayer(input_shape=[input_dim]),
        tfkl.Dense(150, activation=tf.nn.leaky_relu, kernel_initializer='normal'),
        tfkl.Dense(100, activation=tf.nn.leaky_relu, kernel_initializer='normal'),
        tfkl.Dense(100, activation='relu', kernel_initializer='normal'),
        #tfkl.Dense(50, activation='relu', kernel_initializer='normal'),
        #tfkl.Dense(10, activation='relu', kernel_initializer='normal'),
        #tfkl.Dense(latent_dim,activation=None)])
## Encodificamos
z = encoder.predict(x_test)

### Visualizamos el espacio latente

In [None]:
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
import random


fig = pyplot.figure()
ax = Axes3D(fig)

x_vals=z[:,0]
y_vals=z[:,1]
z_vals=z[:,2]

ax.scatter(x_vals, y_vals, z_vals)
pyplot.show()