**Algortimos de optimización - Aprendizaje Profundo | Dr. Mariano Rivera**

*Por: Ixhel Escamilla Illescas*

* Data: Los datos representan una serie de tiempo histórica diaria para una moneda digital (BTC)

* Rango de fecha de los datos: 2018-05-11 al 2021-01-30

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib as mpl

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
columns = ['date','open','high','low','close','volume']
data = pd.read_csv("/kaggle/input/bitcoin-time-series/bitcoin_usd.csv", names=columns, header='infer', skiprows=1, parse_dates=['date'], index_col='date')
data = data.rename_axis('date').reset_index()
data.head()

In [None]:
from statsmodels.nonparametric.smoothers_lowess import lowess
import scipy.stats as st
from scipy.stats import norm
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

def plot(data, x, y, title="", xLabel="" , yLabel='Precio USD', dpi=100):
    plt.figure(figsize=(20,7), dpi=dpi)
    plt.plot(x, y, color='tab:red')
    plt.gca().set(title=title, xLabel=xLabel, yLabel=yLabel)
    plt.show()

plot(data, x=data.date, y=data.high, title='Bitcoin - Precio mas alto en el mercado de Julio de 2018 a Enero de 2021', xLabel='n = ' + str(data.high.count()))

batch_size = int(data.high.count()) # tamaño de la muestra = 1000
m = int(batch_size*0.05) #tam >= 15 (5% del total de datos = 50)
j = int(batch_size/m) 
sigma = 0.2
Mj = np.arange(j, batch_size, j) # indices para cálculo de medias 
X_m = data.iloc[Mj].date

y_m_open = (data.iloc[Mj].open)/(data.open.max()) #valores de la serie de tiempo (ini)
y_m_high = (data.iloc[Mj].high)/(data.high.max()) #valores de la serie de tiempo (altos)
y_m_low = (data.iloc[Mj].low)/(data.low.max()) #valores de la serie de tiempo (bajos)
y_m_close = (data.iloc[Mj].close)/(data.close.max()) #valores de la serie de tiempo (cierre)

for i in range(len(y_m_high)+1):
    y_m_median = (y_m_high[:i]+y_m_low[:i])/2

def plot_dist(data, x, y1, y2, y3, title="", xLabel="" , yLabel="", dpi=100):
    plt.figure(figsize=(20,7), dpi=dpi)
    plt.plot(x, y1, color='tab:red')
    plt.plot(x, y2, color='tab:blue')
    plt.plot(x, y3, color='tab:cyan')
    plt.gca().set(title=title, xLabel=xLabel, yLabel=yLabel)
    plt.show()

plot_dist(y_m_median, x=X_m, y1=y_m_high, y2=y_m_low, y3=y_m_median, title='Media ponderada entre precio más alto y el más bajo para Mu (normalizados)', xLabel = 'm = '+ str(m))

y_m_df = pd.DataFrame(y_m_median)
y_m_df.columns = ['median']

#Mu suavizadas
data_soft = pd.DataFrame(lowess((data.high/data.high.max()), np.arange(len(data.high)), frac=0.05)[:, 1], index=data.date, columns=['high'])
y_m_median_soft = data_soft.iloc[Mj].high

X = np.array(y_m_median.index).reshape((-1,1))
y = np.array(y_m_median_soft)

## LS 
regresion = linear_model.LinearRegression(normalize=True, fit_intercept=True).fit(X,y)
coef_det = regresion.score(X,y)
weight_b0 = regresion.intercept_ #scalar y en x=0
weight_b1 = regresion.coef_ #array dif x_i  x_i+1
w0 = np.array(weight_b0).reshape(1)

y_rl = [weight_b0]
for i in range(len(X)-1):
    y_rl.append(y_rl[i-1]+weight_b1)

## Ridge
clf = Ridge(alpha=0.9)
clf.fit(X, y)
y_C = clf.predict(X)  
coef0 = y_C.min()
coef1 = (y_C[0]-y_C[1])

## S = 0.02
plt.figure(figsize=(20,7), dpi=100)
plt.plot(X_m, y_rl, color='red')
plt.plot(X_m, y_C, color='gray')
plt.plot(X_m, y_m_median_soft, 'o', color='blue')
data_soft['high'].plot(title="S - 0.05, m - 50 :: Regresión lineal y ajuste Ridge ") 
plt.show()

In [None]:
# Phi_ij 
def phi(data, x, y, sigma=0.2):
    for i in range(len(data)+1):
        Phi_alpha = (1/(2*(sigma**2)))*(((x[:i])-(y[:i]))**2) #y 
    return(Phi_alpha)

Phi_alpha = phi(y_m_median_soft, x=y_m_median.index, y=y_m_median_soft)

plot(Phi_alpha, x=y_m_median.index, y=Phi_alpha, title='y = Phi*alpha + epsilon', xLabel='n = ' + str(Phi_alpha.count()), yLabel="valores optimizados + error")

In [None]:
#LS = ||y - phi*alpha ||^2

def LS(data, y, phi_trans):
    for i in range(len(data)+1):
        alpha_min = (np.abs(y[:i]-phi_trans[:i]))**2
    return(alpha_min)

alpha_min = LS(y_m_median_soft, y=y_m_median_soft, phi_trans=Phi_alpha)
plot(alpha_min, x=y_m_median.index, y=alpha_min, title='y = LS', xLabel='n = ' + str(alpha_min.count()), yLabel="minimos cuadrados")

In [None]:
X_date = np.array(X_m).reshape(-1,1)
X = np.array(y_m_median.index).reshape(-1,1)
y = np.array(y_m_median_soft)

## Coeficientes Ridge
for i in range (len(y_m_median_soft)): 
    theta = y_m_median_soft-(coef1*j)


In [None]:
f_params={'kappa' : 0.05, 
          'X'     : X, 
          'y'     : y}

gd_params = {'alpha'          : 0.95, 
             'alphaADADELTA'  : 0.7,
             'alphaADAM'      : 0.95,
             'alphaNADAM'     : 0.95,
             'nIter'          : 50,
             'batch_size'     : 49,
             'eta'            : 0.9,
             'eta1'           : 0.9,
             'eta2'           : 0.999}

ixhel_params = {'sigma': 0.2,
                'X': X_m,
                'y': y_m_median}


In [None]:
#Gradientes
def grad_quadratic(theta, f_params):
    X = f_params['X']
    y = f_params['y']
    err=theta[0]*X+theta[1]-y  
    partial0=err
    partial1=X*partial0
    gradient= np.concatenate((partial1, partial0), axis=1)
    return np.sum(gradient, axis=1)

def grad_exp(theta, f_params):
    kappa= f_params['kappa']
    X    = f_params['X']
    y    = f_params['y']
    err=theta[0]*X+theta[1]-y
    partial0=err*np.exp(-kappa*err**2)
    partial1=X*partial0
    gradient= np.concatenate((partial1, partial0), axis=1)
    return np.mean(gradient, axis=1)

def grad_nu(theta, f_params):
    sigma = f_params['kappa']
    x     = f_params['X']
    y     = f_params['y']
    for i in range(len(x)):
        Phi_alpha = (-1/(2*(sigma**2)))*((X[i]-y[:i])**2) #y 
        gradient = np.array(Phi_alpha).reshape(-1,1)
    return np.mean(gradient, axis=0)

In [None]:
#Descenso de gradiente estocástico

def SGD(theta=[], grad=None, gd_params=[], f_params=[]):
    (high,dim) = f_params['X'].shape
    batch_size = gd_params['batch_size']
    nIter      = gd_params['nIter']
    alpha      = gd_params['alpha'] 
    Theta=[]
    for t in range(nIter):
        # Set of sampled indices
        smpIdx = np.random.randint(low=0, high=high, size=batch_size, dtype='int32')
        # sample 
        smpX = f_params['X'][smpIdx]
        smpy = f_params['y'][smpIdx]
        # parametros de la funcion objetivo
        smpf_params ={'kappa' : f_params['kappa'], 
                      'X'     : smpX , 
                      'y'     : smpy}
        p = grad(theta,f_params=smpf_params)
        theta = theta - alpha*p
        Theta.append(theta)
    return np.array(Theta)

ThetaSGD = SGD(theta=theta, grad=grad_exp, 
               gd_params=gd_params, f_params=f_params)



In [None]:
Tmax=50
plt.figure(figsize=(20,7))
plt.plot(ThetaSGD[:Tmax,-1], '.')
plt.title('SGD')


In [None]:
#Descenso de gradiente estocástico acelerado de tipo Nesterov

def NAG(theta=[], grad=None, gd_params={}, f_params={}):

    nIter = gd_params['nIter']
    alpha = gd_params['alpha'] 
    eta   = gd_params['eta']
    p     = np.zeros(theta.shape)
    Theta=[]
    
    for t in range(nIter):
        pre_theta = theta - (2.0*alpha*p)
        g = grad(pre_theta, f_params=f_params)
        p = g + eta*p
        theta = theta - (alpha*p)
        Theta.append(theta)
    return np.array(Theta)


In [None]:
ThetaNAG = NAG(theta=theta, grad=grad_exp, 
               gd_params=gd_params, f_params=f_params)
Tmax=50
plt.figure(figsize=(20,7))
plt.plot(ThetaNAG[ :Tmax,0], '.')
plt.title('NAG')

In [None]:
#AdaDelta

def ADADELTA(theta=[], grad=None, gd_params={}, f_params={}):
   
    epsilon  = 1e-10
    nIter    = gd_params['nIter']
    alpha    = gd_params['alphaADADELTA'] 
    eta      = gd_params['eta']
    G        = np.zeros(theta.shape)
    g        = np.zeros(theta.shape) 
    Theta=[]
    for t in range(nIter):
        g = grad(theta, f_params=f_params)
        G = (eta*g**2) + ((1-eta)*G)
        p = 1.0/(np.sqrt(G)+epsilon)*g
        theta = theta - (alpha * p)
        Theta.append(theta)
    return np.array(Theta)


In [None]:
ThetaADADELTA = ADADELTA(theta=theta, grad=grad_exp, 
                         gd_params=gd_params, f_params=f_params)
Tmax=100
plt.figure(figsize=(20,7))
plt.plot(ThetaADADELTA[1,:Tmax], '.')
plt.title('ADADELTA')

In [None]:
#ADAM

def ADAM(theta=[], grad=None, gd_params={}, f_params={}):
   
    epsilon= 1e-8
    nIter    = gd_params['nIter']
    alpha    = gd_params['alphaADAM'] 
    eta1     = gd_params['eta1']
    eta2     = gd_params['eta2']
    p        = np.zeros(theta.shape)
    v        = 0.0
    Theta    = []
    eta1_t = eta1
    eta2_t = eta2
    
    for t in range(nIter):
        g  = grad(theta, f_params=f_params)
        p  = eta1*p + (1.0-eta1)*g
        v  = eta2*v + (1.0-eta2)*(g**2)
        pg = p/(1.-eta1_t)
        vg = v/(1.-eta2_t)
        theta = (theta - alpha * pg) / (np.sqrt(vg)+epsilon)
        eta1_t *= eta1
        eta2_t *= eta2
        Theta.append(theta)
    return np.array(Theta)

In [None]:
ThetaADAM = ADAM(theta=theta, grad=grad_exp, 
                 gd_params=gd_params, f_params=f_params)
Tmax=50
plt.figure(figsize=(20,7))
plt.plot(ThetaADAM[:Tmax,0], '.')
plt.title('ADAM')

In [None]:
#NADAM

def NADAM(theta=[], grad=None, gd_params={}, f_params={}):
   
    epsilon= 1e-8
    nIter    = gd_params['nIter']
    alpha    = gd_params['alphaNADAM'] 
    eta1     = gd_params['eta1']
    eta2     = gd_params['eta2']
    p        = np.zeros(theta.shape)
    v        = 0.0
    Theta    = []
    eta1_t = eta1
    eta2_t = eta2
   
    for t in range(nIter):
        g  = grad(theta, f_params=f_params)+alpha
        p  = g + eta1*p
        v  = eta2*v + (1.0-eta2)*(g**2)
        pg = p/(1.-eta1_t)
        vg = v/(1.-eta2_t)
        theta = (theta - alpha * pg) / (np.sqrt(vg)+epsilon)
        eta1_t *= eta1
        eta2_t *= eta2
        Theta.append(theta)
    return np.array(Theta)

In [None]:
ThetaNADAM = NADAM(theta=theta, grad=grad_exp, 
                 gd_params=gd_params, f_params=f_params)
Tmax=50
plt.figure(figsize=(20,7))
plt.plot(ThetaNADAM[:Tmax,0], '.')
plt.title('NADAM')

In [None]:
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D

mpl.rcParams['legend.fontsize'] = 14

fig = plt.figure(figsize=(15,15))
ax = fig.gca(projection='3d')
nIter=np.expand_dims(np.arange(ThetaSGD.shape[0]),-1) 
Tmax=100

ax.plot(ThetaSGD[:Tmax,0], ThetaSGD[:Tmax,0], nIter[:Tmax,0], label='SGD')
ax.plot(ThetaNAG[:Tmax,0], ThetaNAG[:Tmax,0], nIter[:Tmax,0], label='NAG')
ax.plot(ThetaADADELTA[:Tmax,0], ThetaADADELTA[:Tmax,0], nIter[:Tmax,0], label='ADADELTA')
ax.plot(ThetaADAM[:Tmax,0], ThetaADAM[:Tmax,0], nIter[:Tmax,0], label='ADAM')
ax.plot(ThetaNADAM[:Tmax,0], ThetaADAM[:Tmax,0], nIter[:Tmax,0], label='NADAM')
ax.legend()
ax.set_title(r'Trayectorias los parámetros calculados con distintos algoritmos')
ax.set_xlabel(r'$\theta_1$')
ax.set_ylabel(r'$\theta_0$')
ax.set_zlabel('Iteración')
plt.show()