In [17]:
import numpy as np
from torch.utils.data import DataLoader
from torch import nn
import random
import math
import torch
from torch import nn
from torch import optim
from tensorboardX import SummaryWriter
from datetime import datetime
import time
import load
from models import DynamicTopologyModel
import load_topology_dataset
from load_topology_dataset import MAX_CELL_COUNT, MIN_CELL_COUNT, IMAGE_SIZE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device="cpu"
EXPERIMENT=7
writer = SummaryWriter('/src/runs/experiment_{}/'.format(EXPERIMENT))

In [18]:
def save_parameters(writer, model, batch_idx):
    for k, v in model.state_dict().items():
        shape = v.shape
        # Don't do this for single weights or biases
        if np.any(np.array(shape) > 1):
            mean = torch.mean(v)
            std_dev = torch.std(v)
            maximum = torch.max(v)
            minimum = torch.min(v)
            writer.add_scalars("{}_{} ".format(k, shape), {"mean": mean,
                                                                    "std_dev": std_dev,
                                                                    "max": maximum,
                                                                    "min": minimum}, batch_idx)
            writer.add_histogram("{}_{}".format(k, shape), v, batch_idx)
            
        else:
            writer.add_scalar("{}_{}".format(k, shape), v.data, batch_idx)

In [19]:
def test_parameters_update(last_parameters_np ,parameters_np):
    for last_param, param in zip(last_parameters_np, parameters_np):
        diff = param - last_param
        #print(last_parameters.max())
        print(diff.max())
        #print(not np.all(diff))


In [None]:
BATCH_SIZE = 256 
INPUT_SEQ_LEN = 3
TARGET_SEQ_LEN = 1
LEARNING_RATE = 3e-2
SAVE_VARIABLE_EVERY = 100
DATASET_WORKERS = 4

# Define Data.
cell_load_dataset = load_topology_dataset.LoadCellDataset(initial_cell_counts=(2, 5), initial_load_counts=(2,5),
                                    input_seq_len=INPUT_SEQ_LEN, target_seq_len=TARGET_SEQ_LEN, network_mutate_prob=[0.33, 0.66
                                                                                                                    ])
dataloader = DataLoader(cell_load_dataset, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=DATASET_WORKERS)

# Define Model
dynamic_topology_model = DynamicTopologyModel(neighbourhood_hidden_size=32, neighbourhood_cell_count=6,
                     neighbourhood_output_size=16, lstm_hidden_size=32, lstm_layers=2, teacher_forcing_probability=0.5, device=device).to(device)

parameters = dynamic_topology_model.parameters

# Define optimizer
optimizer = optim.Adam(parameters
                       , lr=LEARNING_RATE)
criterion = nn.MSELoss()
last_parameters_np = list(np.copy(param.cpu().detach().numpy()) for param in parameters)

# Train
start = datetime.now()
for batch_idx, data in enumerate(dataloader):
    optimizer.zero_grad()
    reference_cell_input =  data[0].to(device)
    reference_cell_present_input = data[1].to(device)
    neighbourhood_cell_rel_input = data[2].to(device)
    neighbourhood_cell_load_input = data[3].to(device)
    reference_cell_target = data[4].to(device)
    reference_cell_present_target = data[5].to(device)
    neighbourhood_cell_rel_target = data[6].to(device)
    decoder_output_seq = dynamic_topology_model.forward(INPUT_SEQ_LEN, TARGET_SEQ_LEN, reference_cell_input, reference_cell_present_input,
                neighbourhood_cell_rel_input, neighbourhood_cell_load_input, reference_cell_target, reference_cell_present_target, neighbourhood_cell_rel_target)
    loss = criterion(decoder_output_seq, reference_cell_target)
    loss.backward()
    optimizer.step()
    if batch_idx == 0:
        parameters = dynamic_topology_model.parameters
        parameters_np = list(param.cpu().detach().numpy() for param in parameters)
        test_parameters_update(last_parameters_np, parameters_np)
    if batch_idx % SAVE_VARIABLE_EVERY == 0:
        end = datetime.now()
        print("{} Examples/sec".format((SAVE_VARIABLE_EVERY* BATCH_SIZE) / ((end - start).total_seconds()) ))
        start = datetime.now()
        print(loss)
        inputs = (INPUT_SEQ_LEN, TARGET_SEQ_LEN, reference_cell_input, reference_cell_present_input,
                neighbourhood_cell_rel_input, neighbourhood_cell_load_input, reference_cell_target, reference_cell_present_target, neighbourhood_cell_rel_target)
        #writer.add_graph(dynamic_topology_model, inputs, True)
        writer.add_scalar("loss", loss, batch_idx)
        save_parameters(writer, dynamic_topology_model, batch_idx)

0.029536098
0.029875934
0.029965103
0.02998668
0.027734831
-0.029980391
0.02983787
-0.029998261
0.029999375
0.0
0.029998662
0.029998668
0.029998546
0.0
0.029999735
0.029999733
0.0
0.0
0.029997766
0.029999375
0.029999666
0.029999696
0.0299998
0.029995665
0.029998153
0.02999815
0.029999748
0.029998988
0.029999875
0.029999875
0.029999956
0.030000001
54126.168945533434 Examples/sec
tensor(2.3207, device='cuda:0', grad_fn=<MseLossBackward>)
4295.626347626918 Examples/sec
tensor(1.0478, device='cuda:0', grad_fn=<MseLossBackward>)
4128.249888126041 Examples/sec
tensor(1.1725, device='cuda:0', grad_fn=<MseLossBackward>)
4277.477992292185 Examples/sec
tensor(1.2257, device='cuda:0', grad_fn=<MseLossBackward>)
4725.615271416451 Examples/sec
tensor(1.4656, device='cuda:0', grad_fn=<MseLossBackward>)
4395.203665325156 Examples/sec
tensor(1.1472, device='cuda:0', grad_fn=<MseLossBackward>)
4502.177840597227 Examples/sec
tensor(0.7937, device='cuda:0', grad_fn=<MseLossBackward>)
4806.5323779098535 E