# Reference Links



*   https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module
*   https://pytorch.org/docs/stable/generated/torch.nn.BatchN
* https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear
* https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html#torch.nn.ReLU
* https://pytorch.org/docs/stable/generated/torch.nn.Identity.html#torch.nn.Identity
*  https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html#torch.nn.Sequential
* https://pytorch.org/docs/stable/generated/torch.cat.html#torch.cat


In [None]:
class FFN(nn.Module):

    def __init__(self, sizes, normalize_input=False, normalize_hidden=False,
                 normalize_output=False, activation=nn.ReLU,
                 output_activation=nn.Identity): 
        super().__init__()

        layers = []

        if normalize_input:
          layers.append(nn.BatchNorm1d(sizes[0], dtype = torch.float64))

        for j in range(1,len(sizes)):

            layers.append(nn.Linear(sizes[j-1], sizes[j], dtype = torch.float64))

            if j<(len(sizes)-1):
                if normalize_hidden:  
                    layers.append(nn.BatchNorm1d(sizes[j], dtype = torch.float64))

                try:
                    layers.append(activation())
                except TypeError:
                    print('No layer added')
                    #print('Linear activation')
                    #layers.append(activation(sizes[j-1], sizes[j], dtype = torch.float64))
            else:
                if normalize_output: # This was commented out in original code
                    layers.append(nn.BatchNorm1d(sizes[j], dtype = torch.float64)) 
                layers.append(output_activation()) 
        self.net = nn.Sequential(*layers)

    def forward(self, *args):
        x = torch.cat(args, -1)
        return self.net(x)
        
# freeze / unfreeze networks' parameters
def toggle(net: nn.Module,  to: bool):
    for p in net.parameters():
        p.requires_grad_(to)

In [None]:
def init_weights(m, var = '',verbose=False):
    if isinstance(m, nn.Linear):
        if var == 'ones':
          if verbose: print('Init weights with ones')
          nn.init.ones_(m.weight.data)
        elif var == 'zeros':
          if verbose: print('Init weights with zeros')
          nn.init.zeros_(m.weight.data)
        elif var == 'uniform':
          if verbose: print('Init weights with uniform')
          nn.init.uniform_(m.weight.data)
        elif var == 'normal':
          if verbose: print('Init weights with normal')
          nn.init.normal_(m.weight.data, 0.0, 0.04)
        elif var == 'Sigmoid':
          if verbose: print('Init weights with xavier uniform sigmoid')
          nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain('sigmoid'))
        else:
          if verbose: print('Init weights with xavier uniform relu')
          nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain('relu'))

        nn.init.zeros_(m.bias)

In [None]:
def U(x, profit, kwargs):

    s, pool_price, pool_sum, n, g, m = x[:,0], x[:,1], x[:,2], x[:,3], x[:,4], x[:,5]

    if x.size(dim=1) > 6:
      fees = x[:,6]

    if kwargs['kind'] == 'Linear':
      output = profit
    elif kwargs['kind'] == 'Linear Wealth':
      output = g + m
    elif kwargs['kind'] == 'Linear Wealth Inventory Penalty':
      a = kwargs['penalty_factor']
      output = g + m - a * n**2.
    elif kwargs['kind'] == 'Linear Profit Inventory Penalty':
      a = kwargs['penalty_factor']
      output = profit - a * n**2.
    elif kwargs['kind'] == 'Exponential':
      output = torch.zeros_like(profit)
      a = kwargs['penalty_factor']
      output[profit.lt(0)] = profit[profit.lt(0)]
      output[profit.ge(0)] = 1-torch.exp(-a*profit[profit.ge(0)])
    else:
      print('No utility specified')
      output = profit
    
    return output

In [None]:
def GBM_step(sigma: float, tau: float, mu: float, s: torch.Tensor, **kwargs):
    """
    Step of GBM with drift
    """  
    # This is just our standard Geometric Brownian Motin with 0 drift
    s_next = s * torch.exp((mu -0.5 * sigma**2) * tau + sigma * np.sqrt(tau)*torch.randn_like(s)) 
    return s_next

# Reference links

*   https://pytorch.org/docs/stable/generated/torch.randn.html#torch.randn
*   https://pytorch.org/docs/stable/generated/torch.clamp.html#torch.clamp



In [None]:
def sample_x0(batch_size, dim, sigma, mu, initial_price,
              reference_variation, reference_min, ga_var, ga_mean, ma_var,
              M_i, T, x0_kwargs, device='cpu' ):
  
    assert dim >= 6

    # Random normal tensor of batchsize * dim
    z = torch.randn(batch_size, dim, device=device, dtype = torch.float64)

    # Populate the x0 tensor with what looks like a discounted share price
    # These will be the values for x0[:,2], pool A_S
    x0 = torch.exp((mu - 0.5 * sigma**2) + sigma * z).to(torch.float64) # lognormal

    # This gives the values for pool A_B, will be a random value within a 
    # distribution of A_S times the initial price
    x0[:,1] = x0[:,1] * initial_price

    # Set the inital stock price on the centralized exchange to be A_B / A_S 
    # plus some noise (mean 0 sd 5) with a min of 5
    x0[:,0] =  torch.clamp(x0[:,1] / x0[:,2] +\
                           reference_variation * torch.randn_like(x0[:,0]),
                           min=reference_min)

    # Set the random initial position to be a normal distribution with sd
    # as the square root of the max time simulated, rounded to nearest integer
    x0[:,3] = torch.round(np.sqrt(T) * torch.randn_like(x0[:,0], dtype = torch.float64))

    pool_price = x0[:,1] / x0[:,2]
    pool_sum = x0[:,1] + x0[:,2]

    x0[:,1] = pool_price
    x0[:,2] = pool_sum

    # set the starting general account value
    # ensure that value is greater than min margin
    if 'mean_gen_int' not in x0_kwargs.keys():
      x0[:,4] = torch.clamp(ga_var*torch.randn_like(x0[:,4]) + ga_mean, min=x0[:,0]*M_i)
    else:
      x0[:,4] = torch.clamp(x0_kwargs['var_gen_int']*torch.randn_like(x0[:,1])*x0[:,1]*M_i + x0[:,1]*M_i*x0_kwargs['mean_gen_int'], min=x0[:,1]*M_i*x0_kwargs['min_gen_int'])

    # Set the margin to be randomly distributed around the initial margin required for the position
    x0[:,5] = torch.clamp(ma_var*torch.randn_like(x0[:,5]) +\
                          M_i * x0[:,1] * torch.abs(x0[:,3]), min=0.)

    if dim > 6:
      x0[:,6] = torch.rand_like(x0[:,6]) * x0_kwargs['fee_range']
      #torch.clamp(x0_kwargs['var_fees']*torch.randn_like(x0[:,6]) + x0_kwargs['mean_fees'], min=0.)

    return x0

In [None]:
class SimulationSampler():

  def __init__(self, max_steps, utility, market_args, s_args, sampler,
               random_agent_percentage=1.0,
               ideal_agent_percentage=0.0, fee_range=0, device='cpu'):

    self.max_steps = max_steps
    self.random_agent_percentage = random_agent_percentage
    self.ideal_agent_percentage = ideal_agent_percentage
    self.random_agent = RandomAgent(utility=utility, device=device,
                                    **market_args) 
    '''
    if market_args['random_agent_update']:
      self.ideal_agent = IdealOneStepAgentTraders(utility=utility,
                                                  device=device, **market_args)
    else:
      self.ideal_agent = IdealOneStepAgent(utility=utility, 
                                           device=device, **market_args)
    '''
    self.sampler = sampler
    self.s_args = s_args
    self.fee_range = fee_range
  
  def sample_x0(self, n_batch):
    assert n_batch % self.max_steps == 0

    simulations_to_run = int(n_batch / self.max_steps)
    random_results = simulate_strategies_basic({'RandomAgent': self.random_agent},
                                         int(simulations_to_run \
                                             * self.random_agent_percentage), 
                                         self.max_steps, self.sampler,
                                         **self.s_args)
    
    all_states = torch.stack(random_results[0].path, 1)
    if self.random_agent.variable_fees:
      all_states = torch.reshape(all_states, (-1,7))
      # Change the fees so that there is greater variety. The training agent
      # sees the states as isolated so this prevents many similar states also
      # with similar fees
      all_states[:,6] = torch.rand_like(all_states[:,6]) * self.fee_range
    else: 
      all_states = torch.reshape(all_states, (-1,6))
    '''
    if self.ideal_agent_percentage > 0:
      ideal_results = simulate_strategies_basic({'IdealAgent': self.ideal_agent},
                                          int(simulations_to_run \
                                              * self.ideal_agent_percentage), 
                                          self.max_steps, self.sampler, 
                                          **self.s_args)
      all_ideal_states = torch.stack(ideal_results[0].path, 1)
      if self.random_agent.variable_fees:
        all_ideal_states = torch.reshape(all_ideal_states, (-1,7))
      else:
        all_ideal_states = torch.reshape(all_ideal_states, (-1,6))
      all_states = torch.cat((all_states, all_ideal_states))'''

    return all_states


In [None]:
def save_state(fileheader, agent, parameters, bellman_loss, bellmax_approx,
               device='cpu', results_path='./'):

  results_filename = fileheader + "_state.pth.tar"
  parameters_filename = fileheader + '_parameters.json'

  cpu_rng_state = torch.get_rng_state()
  if torch.cuda.is_available():
    gpu_rng_state = torch.cuda.get_rng_state(device)
  else:
    gpu_rng_state = None
  print(torch.rand(1))
  # save results
  state = {"v":agent.v.state_dict(), "C": agent.C.state_dict(),
          "Scheduler_C": agent.scheduler_C.state_dict(),
          "Optimizer_C": agent.optimizer_C.state_dict(),
          "Scheduler_v": agent.scheduler_v.state_dict(),
          "Optimizer_v": agent.optimizer_v.state_dict(),
          "bellman_loss": bellman_loss,
          "bellman_approx": bellman_approx,
          "cpu_random": cpu_rng_state,
          "gpu_random": gpu_rng_state
          }
  torch.save(state, os.path.join(results_path, results_filename ))

  with open(os.path.join(results_path, parameters_filename), 'w') as fp:
      json.dump(parameters, fp)

In [None]:
def load_agent(state_filepath, parameters_filepath, device='cpu'):

  with open(parameters_filepath, 'r') as fp:
    parameters = json.load(fp)
  x0_args = parameters['x0_args']
  s_args = parameters['s_args']
  market_args = parameters['market_args']
  utility_args = parameters['utility_args']
  C_args = parameters['C_args']
  v_args = parameters['v_args']
  learning_args = parameters['learning_args']
  training_history = parameters['training_history']
  sim_samp_args = parameters['sim_samp_args']
  

  random_seed = parameters['random_seed']
  torch.manual_seed(random_seed)

  x0_sampler = partial(sample_x0, device=device, **x0_args)
  utility = partial(U, kwargs=utility_args)
  sim_samp = SimulationSampler(utility=utility, market_args=market_args,
                               s_args=s_args, sampler=x0_sampler, 
                               device=device,
                               **sim_samp_args)

  modified_x0_sampler = partial(sim_samp.sample_x0)
  agent = ActorCritic(utility=utility, x0_sampler=modified_x0_sampler,
                      device=device, **learning_args, **C_args, **v_args,
                      **market_args)
  
  try:
      state = torch.load(state_filepath, map_location=device)
      loading_success = True
      print('success')
  except:
      print("I could not load a pre-trained model")
      loading_success = False
  if loading_success:
      agent.v.load_state_dict(state['v'])
      agent.C.load_state_dict(state['C'])
      agent.optimizer_C.load_state_dict(state['Optimizer_C'])
      agent.optimizer_v.load_state_dict(state['Optimizer_v'])
      agent.scheduler_C.load_state_dict(state['Scheduler_C'])
      agent.scheduler_v.load_state_dict(state['Scheduler_v'])
      bellman_loss = state['bellman_loss']
      bellman_approx = state['bellman_approx']

      torch.set_rng_state(state['cpu_random'].cpu())

      if torch.cuda.is_available():
        if state['gpu_random'] is not None:
          torch.cuda.set_rng_state(state['gpu_random'].cpu())
        else:
          print('No GPU state saved')
      else:
        print('GPU not available')
  print(torch.rand(1))
  return agent, parameters, bellman_loss, bellman_approx

In [None]:
def load_agents_from_file_headers(file_headers, agent_names, results_path = './numerical_results'):
  assert len(file_headers) == len(agent_names)

  agents_dict = dict()
  parameters_dict = dict()
  bellman_loss_dict = dict()
  bellman_approx_dict = dict()

  i = 0
  for fileheader in file_headers:
    results_filename = fileheader + "_state.pth.tar"
    parameters_filename = fileheader + '_parameters.json'

    agent, parameters, bellman_loss, bellman_approx = load_agent(os.path.join(results_path, results_filename), 
                                                                os.path.join(results_path, parameters_filename))
    
    agents_dict[agent_names[i]] = agent
    parameters_dict[agent_names[i]] = parameters
    bellman_loss_dict[agent_names[i]] = bellman_loss
    bellman_approx_dict[agent_names[i]] = bellman_approx

    i += 1
    agent.v.eval()
    agent.C.eval()
    agent.verbose = False

  return agents_dict, parameters_dict, bellman_loss_dict, bellman_approx_dict