In [None]:
import torch
import matplotlib.pyplot as plt
import time

# I. Basic gradient descent

In [None]:
# Hyperparameters
alpha=0.5      # For Lp balls

def Loss(q):
  alpha_norms = torch.sum( torch.abs(q)**alpha, dim=1 )**(1/alpha)
  q_projected = q/alpha_norms[:,None]
  return torch.norm( q-q_projected )**2

N = 200
T = 50
dt = 0.05
plot_flag = False

p = torch.zeros( [N, 2] )
theta = 2*torch.pi*torch.linspace(0,1, N)
q = torch.vstack( [torch.cos(theta), torch.sin(theta)] ).T
q = q+0.1*torch.randn( [N,2] )
#
print("Shapes of positions and momenta vectors:")
print( q.shape )
print( p.shape )

print("Iterating...")
tic = time.time()
for t in range(T):
  np_q = q.clone().detach().numpy()
  np_p = p.clone().detach().numpy()
  if plot_flag:
    plt.scatter( np_q[:,0], np_q[:,1] )
    plt.plot( np_q[:,0], np_q[:,1], 'r' )
    for i in range(N):
      plt.arrow(x=np_q[i,0], y=np_q[i,1], dx=np_p[i,0]*0.2, dy=np_p[i,1]*0.2, width=.01) 
    plt.show()
  # Autograd for Loss
  q.requires_grad = True
  L = Loss(q)
  L.backward()
  dL = q.grad
  # Making step
  q = torch.tensor( q - dt*dL, requires_grad=True)
  p = torch.tensor( -dL, requires_grad=True)
# end for
toc = time.time()
print( "Total time: ", toc-tic)

# II. Gradient descent with line search



In [None]:
# Hyperparameters
alpha=0.5      # For Lp balls

def Loss_vectorized(q):
  alpha_norms = torch.sum( torch.abs(q)**alpha, dim=1 )**(1/alpha)
  q_projected = q/alpha_norms[:,None]
  return torch.norm( q-q_projected, dim=1 )**2

N = 200
T = 50
dt = 0.05
plot_flag = False

p = torch.zeros( [N, 2] )
theta = 2*torch.pi*torch.linspace(0,1, N)
q = torch.vstack( [torch.cos(theta), torch.sin(theta)] ).T
q = q+0.1*torch.randn( [N,2] )
#
print("Shapes of positions and momenta vectors:")
print( q.shape )
print( p.shape )

print("Iterating...")
tic = time.time()
Losses = None
step_damping = torch.ones( [N] )
for t in range(T):
  np_q = q.clone().detach().numpy()
  np_p = p.clone().detach().numpy()
  if plot_flag:
    plt.scatter( np_q[:,0], np_q[:,1] )
    plt.plot( np_q[:,0], np_q[:,1], 'r' )
    for i in range(N):
      plt.arrow(x=np_q[i,0], y=np_q[i,1], dx=np_p[i,0]*0.2, dy=np_p[i,1]*0.2, width=.01) 
    plt.xlim( (-1.5, 1.5) )
    plt.ylim( (-1.5, 1.5) )
    plt.show()
  # Autograd for Loss
  q.requires_grad = True
  Losses_old = Losses
  Losses = Loss_vectorized(q)
  if Losses_old is not None:
    mask = 0.0+( Losses > Losses_old )
    step_damping  -= 0.5*step_damping*mask
    q = torch.tensor( mask[:, None]*q_old + (1-mask[:,None])*q, requires_grad=True)
    Losses = Loss_vectorized(q)
  L = torch.sum( Losses )
  L.backward()
  dL = q.grad
  # Making step with line search
  q_old = q
  q = torch.tensor( q - dt*step_damping[:,None]*dL, requires_grad=True)
  p = torch.tensor( -dL, requires_grad=True)
# end for
toc = time.time()
print( "Total time: ", toc-tic)

In [None]:
# Hyperparameters
alpha=0.5      # For Lp balls
k_spring = 1.0 # Force constant in Hooke's law
tresh    = 2.0 # Treshold for gradient

def Hamiltonian(p,q):
  kinetic_energy = torch.norm(p)**2
  potential      = torch.norm( q-torch.roll(q, 1, 0) )**2
  return 0.5*kinetic_energy + 0.5*k_spring*potential

def Loss(q):
  alpha_norms = torch.sum( torch.abs(q)**alpha, dim=1 )**(1/alpha)
  #print( alpha_norms.shape )
  q_projected = q/alpha_norms[:,None]
  #print( q_projected.shape )
  return torch.norm( q-q_projected )**2
  
N = 200
T = 50
dt = 0.05
plot_flag = True

p = torch.zeros( [N, 2] )
theta = 2*torch.pi*torch.linspace(0,1, N)
q = torch.vstack( [torch.cos(theta), torch.sin(theta)] ).T
q = q+0.1*torch.randn( [N,2] )
#
print("Shapes of positions and momenta vectors:")
print( q.shape )
print( p.shape )

print("Iterating...")
tic = time.time()
for t in range(T):
  np_q = q.clone().detach().numpy()
  np_p = p.clone().detach().numpy()
  if plot_flag:
    plt.scatter( np_q[:,0], np_q[:,1] )
    plt.plot( np_q[:,0], np_q[:,1], 'r' )
    for i in range(N):
      plt.arrow(x=np_q[i,0], y=np_q[i,1], dx=np_p[i,0]*0.2, dy=np_p[i,1]*0.2, width=.01) 
    plt.show()
  # Autograd for Hamiltonian
  p.requires_grad = True
  q.requires_grad = True
  H = Hamiltonian(p,q)
  H.backward()
  dH_over_dp = p.grad
  dH_over_dq = q.grad
  # Autograd for Loss
  q.requires_grad = True
  L = Loss(q)
  L.backward()
  dL = q.grad
  # Hamiltonian step
  dq_Hamiltonian = p - dL # dH_over_dp
  dp_Hamiltonian = -dH_over_dq
  # Hybrid step
  scalar_products = torch.sum( dL * dq_Hamiltonian, dim=1)
  dL_perp  = torch.vstack( [-dL[:,1], dL[:,0]] ).T
  #print( dL_perp.shape )
  #print( scalar_products.shape )
  dq_total = -dL + 0.01*dL_perp*scalar_products[:,None]
  dq_total = -dL
  #indices = torch.norm(dL, dim=1) < tresh
  #print( indices.shape )
  #dq_total = -dL*indices[:,None]
  # Making step
  q = torch.tensor( q + dt*dq_total, requires_grad=True)
  p = torch.tensor( p + dt*dp_Hamiltonian, requires_grad=True)
# end for
toc = time.time()
print( "Total time: ", toc-tic)