In [2]:
import torch
import numpy as np
from tqdm import tqdm as tqdm

In [58]:
dim = 2
num_servers = 3
batch_size = 15
avg_over = 100
eps_greedy = 0.5
alpha = 0.5
gamma = 0.8

def model(t):
    batch = t.shape[0]
    num_servers = t.shape[1] - 1
    dim = t.shape[2]
    
    return torch.rand((batch, num_servers))

def sample_req(n):
    return torch.rand((n, dim))
  
def compute_distance(reqs, servers):
    return ((reqs - servers) ** 2).sum(dim=-1) ** 0.5

def train():
    train_batch = sample_req(batch_size * (num_servers + 1))
    train_batch = train_batch.reshape(batch_size, (num_servers + 1), dim)
    
    q_value_old = model(train_batch)
    best_indices = q_value_old.argmax(dim=1)
    rand_indices = torch.randint_like(best_indices, low=0, high=num_servers)
    index_picking = torch.bernoulli( eps_greedy * torch.ones_like(best_indices).double() ).long()
    
    indices = (index_picking * rand_indices) + ((1. - index_picking) * best_indices)
    print(indices)
    
    new_locations = train_batch.clone().detach()
    location_to_move_to = new_locations[:, -1, :]
    # print(new_locations[:, indices, :])
    old_server_loc = new_locations[range(batch_size), indices, :]
    batch_distance = compute_distance(old_server_loc, location_to_move_to)
    new_locations[range(batch_size), indices, :] = location_to_move_to
    # new_locations = new_locations[:, :-1 ,:]
    
    new_req = sample_req(avg_over)
    
    locations_to_avg_over = new_locations[ [i for i in range(batch_size) for j in range(avg_over)], :, :]
    new_req_multiplied = new_req[[i for i in range(batch_size) for j in range(avg_over)], :]
    
    locations_to_avg_over[:, -1, :] = new_req_multiplied
    
    q_values_to_avg = model(locations_to_avg_over)
    max_q_value_to_avg = q_values_to_avg.max(dim=1)[0]
    max_q_values = max_q_value_to_avg.reshape(batch_size, avg_over).mean(dim=1)
    
    update_values = alpha * ( -batch_distance + gamma * max_q_values)
    q_value_new = q_value_old.clone().detach()
    q_value_new[range(batch_size), indices] = (1 - alpha) * q_value_new[range(batch_size), indices] + update_values
    
    print(q_value_old)
    print(q_value_new)
    loss = ((q_value_old - q_value_new) ** 2).sum()
    loss.backward()

In [59]:
train()

tensor([1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2])
tensor([[0.3764, 0.7767, 0.4091],
        [0.5342, 0.4344, 0.0927],
        [0.7355, 0.8481, 0.9093],
        [0.6222, 0.5061, 0.2923],
        [0.4681, 0.5915, 0.5471],
        [0.7296, 0.0865, 0.6821],
        [0.1558, 0.6061, 0.4681],
        [0.4176, 0.8549, 0.8407],
        [0.6358, 0.7394, 0.5939],
        [0.2773, 0.8519, 0.6902],
        [0.8763, 0.6501, 0.2752],
        [0.4381, 0.7223, 0.8781],
        [0.1106, 0.1261, 0.3758],
        [0.2459, 0.3104, 0.4005],
        [0.3211, 0.6888, 0.7006]])
tensor([[ 0.3764,  0.6428,  0.4091],
        [ 0.3435,  0.4344,  0.0927],
        [ 0.4641,  0.8481,  0.9093],
        [ 0.5424,  0.5061,  0.2923],
        [ 0.4681,  0.3605,  0.5471],
        [ 0.3621,  0.0865,  0.6821],
        [-0.0413,  0.6061,  0.4681],
        [ 0.2816,  0.8549,  0.8407],
        [ 0.1434,  0.7394,  0.5939],
        [ 0.2773,  0.2829,  0.6902],
        [ 0.5116,  0.6501,  0.2752],
        [ 0.4381,  0.7223,  

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [61]:
t = torch.arange(10)
t = t.reshape(5, 2).float()
torch.norm(t, p=2, dim=-1)

tensor([ 1.0000,  3.6056,  6.4031,  9.2195, 12.0416])