In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

## Definición de las funciones

In [None]:
def evaluar_grad(x, y):
  R = np.sqrt(x**2 + y**2)
  grad_x = -np.cos(R) * (x / R)
  grad_y = -np.cos(R) * (y / R)
  return np.array([grad_x, grad_y])

def gd(theta, epochs, eta):
  for i in range(epochs):
    x, y = theta
    gradient = evaluar_grad(x,y)
    theta -= eta * gradient
  dist = np.linalg.norm(theta)
  return theta, dist

def sgd(theta, data_train, epochs, eta):
  for i in range(epochs):
    np.random.shuffle(data_train)
    for example in data_train:
      x, y = example
      gradient = evaluar_grad(x, y)
      theta = theta - eta * gradient
    dist = np.linalg.norm(theta)
  return theta, dist

def rmsprop(theta, data_train, epochs, eta, decay, epsilon):
  E_g2 = np.zeros_like(theta)
  for i in range(epochs):
    np.random.shuffle(data_train)
    for example in data_train:
      x, y = example
      gradient = evaluar_grad(x, y)
      E_g2 = decay * E_g2 + (1 - decay) * gradient**2
      theta -= eta / (np.sqrt(E_g2) + epsilon) * gradient
    dist = np.linalg.norm(theta)
  return theta, dist

def adam(theta, data_train, epochs, alpha, beta1, beta2, epsilon):
  m = np.zeros_like(theta)
  v = np.zeros_like(theta)
  t = 0

  for epoch in range(epochs):
    np.random.shuffle(data_train)
    for example in data_train:
      x, y = example
      t += 1
      gradient = evaluar_grad(x, y)
      m = beta1 * m + (1 - beta1) * gradient
      v = beta2 * v + (1 - beta2) * (gradient**2)
      m_hat = m / (1 - beta1**t)
      v_hat = v / (1 - beta2**t)
      theta -= alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    dist = np.linalg.norm(theta)
  return theta, dist

## Primera parte, promedio de los algoritmos

In [None]:
np.random.seed(1001300296)
theta_init = np.array([2.0,2.0])
x_train = np.random.uniform(-6.5, 6.5, 100)
y_train = np.random.uniform(-6.5, 6.5, 100)
data_train = list(zip(x_train, y_train))

theta1, dist1 = gd(theta_init, 1000, 0.1)
theta2, dist2 = sgd(theta_init, data_train, 100, 0.01)
theta3, dist3 = rmsprop(theta_init, data_train, 100, 0.001, 0.9, 1e-8)
theta4, dist4 = adam(theta_init, data_train, 100, 0.001, 0.9, 0.999, 1e-8)

distancias = pd.DataFrame({"Gradient descent":[dist1],
                           "Stochastic Gradient Descent":[dist2],
                           "RMSPROP":[dist3],
                           "Adam":[dist4]})
promedios = np.mean(distancias, axis = 0)
print(f"El mejor optimizador es el {promedios.idxmin()}")
promedios

El mejor optimizador es el Gradient descent


Unnamed: 0,0
Gradient descent,1.570796
Stochastic Gradient Descent,8.674605
RMSPROP,2.279422
Adam,3.680392


## Segunda parte, tabla de frecuencias

In [None]:
%%time
iter = 10000
distancias = np.zeros((iter,4))
distancias = pd.DataFrame(distancias)
distancias.columns = ["Gradient descent","Stochastic Gradient Descent","RMSPROP","Adam"]

theta_init = np.array([2.0,2.0])
for i in range(iter):
  x_train = np.random.uniform(-6.5, 6.5, 100)
  y_train = np.random.uniform(-6.5, 6.5, 100)
  data_train = list(zip(x_train, y_train))
  theta1, distancias["Gradient descent"][i] = gd(theta_init, 1000, 0.1)
  theta2, distancias["Stochastic Gradient Descent"][i] = sgd(theta_init, data_train, 100, 0.01)
  theta3, distancias["RMSPROP"][i] = rmsprop(theta_init, data_train, 100, 0.001, 0.9, 1e-8)
  theta4, distancias["Adam"][i] = adam(theta_init, data_train, 100, 0.001, 0.9, 0.999, 1e-8)

distancias["Mejor_Modelo"] = distancias.idxmin(axis=1)
tabla = distancias["Mejor_Modelo"].value_counts()
porcentajes = tabla/iter
tabla = pd.concat([tabla, porcentajes], axis=1)
tabla.columns = ["Frecuencia", "Porcentaje"]

CPU times: user 1h 12min 32s, sys: 19.8 s, total: 1h 12min 52s
Wall time: 1h 12min 58s


# TABLA FRECUENCIAS MEJORES OPTIMIZADORES

In [None]:
tabla

Unnamed: 0_level_0,Frecuencia,Porcentaje
Mejor_Modelo,Unnamed: 1_level_1,Unnamed: 2_level_1
Gradient descent,4907,0.4907
Stochastic Gradient Descent,4843,0.4843
Adam,143,0.0143
RMSPROP,107,0.0107
