In [256]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%env CUDA_VISIBLE_DEVICES=0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
env: CUDA_VISIBLE_DEVICES=0


In [257]:
import os, sys
from IPython.display import clear_output
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
from pandas import DataFrame
import torch, torch.nn as nn
import numpy as np
import random
import networkx as nx
from utils_mcts import ReplayBuffer, PathsBuffer, get_states_emb, convert_to_walk
from MCTS import MCTS
from problem_mcts import GraphProblem, generate_erdos_renyi_problems, generate_regular_problems
from network_mcts import Agent
import time
import nn_utils

In [258]:
sys.path.insert(0, '..')
moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values

In [259]:
def replace(P, source, target):
    '''Replace last occurrence of source with source-target-source.'''
    assert source in P
    ix = len(P) - P[::-1].index(source)
    return P[:ix] + [target, P[ix - 1]] + P[ix:]

In [260]:
def covering_walk(graph, source):
    P = [0]  # supporting walk
    S = [0]  # stack of nodes to check
    node2anon = {source: 0}
    anon2node = {0: source}
    checked = dict()  # nodes that has been checked for edge
    degrees = graph.degree()
    while len(S) > 0:  # grow supporting walk in DFS manner
        curr = S[-1]
        x = max(P) + 1  # next node to check

        # check if there is a node in the neighborhood that has not been explored yet
        Ncurr = list(nx.neighbors(graph, anon2node[curr]))
        if random.uniform(0, 1) < 0.99:
            random.shuffle(Ncurr)  # option 1: random order
        else:
            Ncurr = sorted(Ncurr, key=lambda v: degrees[v], reverse=True)  # option 2: top-degree
            # Ncurr = sorted(Ncurr, key=lambda v: degrees[v], reverse=False)  # option 3: low-degree
        # print(anon2node[curr], Ncurr)
        for neighbor in Ncurr:
            if neighbor in node2anon:
                continue  # already visited
            else:
                node2anon[neighbor] = x
                anon2node[x] = neighbor
                S.append(x)
                checked.setdefault(curr, set()).add(x)
                P = replace(P, curr, x)  # move to it
                break
        else:
            S.pop()  # move back in the stack

        for u in range(x-1, curr, -1):  # u is already in the supporting walk
            # check if there is connection to already discovered nodes
            if u not in checked[curr]:  # see if we already checked this edge
                if anon2node[u] in graph[anon2node[curr]]:
                    P = replace(P, curr, u)
                checked.setdefault(curr, set()).add(u)

    cover = [anon2node[v] for v in P]
    return cover, P

In [261]:
#params
NUM_PROBLEMS = 50
NUM_EPISODES = 50
BATCH_SIZE = 32
NUM_MCSIMS = 50
NUM_UPDATES = 5
NUM_VERTICES = 15
DEGREE = 6
CPUCT = 1.0
THRESHOLD = 0.75
PATHS_BUFFER_CAPACITY = 1000
REPLAY_BUFFER_CAPACITY = 10000

In [262]:
moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values

In [263]:
#generate regular train graphs (n=15, d=6)
problem_maker = generate_erdos_renyi_problems(num_vertices=NUM_VERTICES, edge_prob=0.3)

In [518]:
#initialize agent
agent = Agent(hid_size=256, gcn_size=256, vertex_emb_size=64, num_vertices=NUM_VERTICES)

In [265]:
optimizer = torch.optim.Adam(agent.parameters(), lr=1e-4)

In [266]:
#initialize buffers
path_buffer = PathsBuffer(capacity=PATHS_BUFFER_CAPACITY, threshold=THRESHOLD)
train_buffer = ReplayBuffer(capacity=REPLAY_BUFFER_CAPACITY)

In [267]:
# loss stats
pi_losses_history = []
v_losses_history = []

In [268]:
problems = [next(problem_maker) for i in range(NUM_PROBLEMS)]

In [269]:
start = time.time()
for k in trange(len(problems)):
    
    problem = problems[k]

    for vertex in problem.get_actions():

        path_buffer.flush()
    
        PATH_LENGTH = 2*problem.num_edges + 1        
        
        i += 1
    
        for i in range(NUM_EPISODES):
            
            problem.path = [vertex]
        
            source = problem.get_state()[0]
            
            with torch.no_grad():
                graph_emb = agent.embed_graph(problem.edges)
                
            mcts = MCTS(game=problem, nnet=agent, graph_emb=graph_emb,
                        numMCTSSims=NUM_MCSIMS, cpuct=CPUCT, path_length=path_length)
                
            random_walk = [source]
            checked = ddict(list)
            stack = [source]
            visited = {source}
            ranks = {0: source} # to attempt to get maximal cover (possible to do without rank, but then no guarantees on maximality)
            revranks = {source: 0}
            
            trainExamples = []
            
            while len(stack) > 0:
                last = stack[-1]
                lastrank = revranks[last]
                maxrank = max(ranks.keys()) + 1
                
                with torch.no_grad():
                    pi = mcts.getActionProb(random_walk)
                
                Nlast = np.argsort(pi)

                # going in depth
                for neighbor in Nlast:
                    if neighbor not in visited: # found new node, then add it to the walk
                        trainExamples.append([random_walk, pi, None])
                        random_walk.append(neighbor)
                        stack.append(neighbor)
                        checked[last].append(neighbor)
                        visited.add(neighbor)
                        ranks[maxrank] = neighbor
                        revranks[neighbor] = maxrank
                        break
                else: # we didn't find any new neighbor and rollback
                    stack.pop()
                    if len(stack) > 0:
                        random_walk.append(stack[-1])
                        checked[last].append(stack[-1])

                # interconnecting nodes that are already in walk
                for r in range(maxrank-1, lastrank+1, -1):
                    node = ranks[r]
                    if node not in checked[last] and node in Nlast:
                        checked[last].append(node)
                        random_walk.extend([node, last])
        
            path_buffer.push(random_walk)
            if len(path_buffer) >= 10: 
                r = path_buffer.rank_path(random_walk)
                for x in trainExamples:
                    x[-1] = r
                train_buffer.push(trainExamples)
            
        if len(train_buffer) >= BATCH_SIZE:
            for i in range(NUM_UPDATES):
                batch = train_buffer.sample(BATCH_SIZE)
                paths, pis, vs = zip(*batch)
                embs = get_states_emb(paths, graph_emb)

                target_pis = torch.FloatTensor(np.array(pis))

                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                out_pi, out_v = agent(embs)
                loss_pi = -torch.sum(target_pis*out_pi)/target_pis.size()[0]
                loss_v = torch.sum((target_vs-out_v.view(-1))**2)/target_vs.size()[0]
                total_loss = loss_pi + loss_v

                pi_losses_history.append(loss_pi.item())
                v_losses_history.append(loss_v.item())

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                if iteration % 5 == 0:
                    clear_output(True)
                    plt.figure(figsize=[12, 6])
                    plt.subplot(1,2,1)
                    plt.title('Policy error'); plt.grid()
                    plt.scatter(np.arange(len(pi_losses_history)), pi_losses_history, alpha=0.1)
                    plt.plot(moving_average(pi_losses_history, span=100, min_periods=100))

                    plt.subplot(1,2,2)
                    plt.title('Value error'); plt.grid()
                    plt.scatter(np.arange(len(v_losses_history)), v_losses_history, alpha=0.1)
                    plt.plot(moving_average(v_losses_history, span=10, min_periods=10))
                    plt.show()
end = time.time()

  0%|          | 0/50 [00:00<?, ?it/s]


NameError: name 'path_length' is not defined

In [None]:
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

In [9]:
p = next(problem_maker)

In [None]:
p.path = [random.sample(list(p.edges.keys()), 1)[0]]

In [None]:
graph_emb = agent.embed_graph(p.edges)
path_length = 2*p.num_edges+1
mcts = MCTS(game=p, nnet=agent, graph_emb=graph_emb,
                    numMCTSSims=NUM_MCSIMS, cpuct=CPUCT, path_length=path_length)
path = p.get_state()
while len(path) != path_length:
    with torch.no_grad():
        pi = mcts.getActionProb(path)
    vertex = np.random.choice(len(pi), p=pi)
    path = p.get_next_state(path, vertex)
print(path)

In [4]:
np.argsort([1, 5, 3])

array([0, 2, 1])

In [326]:
graph_emb = agent.embed_graph(p.edges)

In [420]:
lstm = nn.LSTM(256, hidden_size= 10)

In [402]:
print(lstm)

LSTM(256, 10, num_layers=10, batch_first=True)


In [403]:
x = [3, 5, 6, 7, 8]

In [404]:
l = []
for i in x:
    l.append(graph_emb[i])

In [405]:
l = torch.stack(l).unsqueeze(0)

In [421]:
l

tensor([[[-0.9286,  0.1171,  0.5416,  ...,  0.6903,  0.7174,  0.1541],
         [-1.1723,  0.0236,  1.1856,  ...,  1.2111,  1.4870,  0.0660],
         [-0.8155, -0.0119,  0.6863,  ...,  0.7252,  0.9753,  0.2304],
         [-0.9070,  0.1316,  0.5729,  ...,  0.4311,  0.5688,  0.2329],
         [-1.0753,  0.0830,  0.9038,  ...,  0.7697,  0.9993,  0.1767]]],
       grad_fn=<UnsqueezeBackward0>)

In [411]:
K = l.view(len(x), 1 , -1)

In [422]:
K

tensor([[[-0.9286,  0.1171,  0.5416,  ...,  0.6903,  0.7174,  0.1541]],

        [[-1.1723,  0.0236,  1.1856,  ...,  1.2111,  1.4870,  0.0660]],

        [[-0.8155, -0.0119,  0.6863,  ...,  0.7252,  0.9753,  0.2304]],

        [[-0.9070,  0.1316,  0.5729,  ...,  0.4311,  0.5688,  0.2329]],

        [[-1.0753,  0.0830,  0.9038,  ...,  0.7697,  0.9993,  0.1767]]],
       grad_fn=<ViewBackward>)

In [423]:
out, hidden = lstm(K)

In [428]:
hidden[-1]

tensor([[[ 1.0192,  1.5607,  0.0075,  0.1521, -1.5922, -0.2616, -0.7007,
          -1.3618, -0.1222, -1.4695]]], grad_fn=<StackBackward>)

In [407]:
walk_embs

tensor([[[ 0.0451,  0.1707, -0.1887, -0.0803, -0.3338,  0.0849,  0.0994,
          -0.0092,  0.0209, -0.2359],
         [ 0.0701,  0.2317, -0.1775, -0.0161, -0.2372,  0.0775,  0.0865,
          -0.0051,  0.0095, -0.3846],
         [ 0.1011,  0.2673, -0.2765, -0.0446, -0.2541,  0.1182,  0.1001,
          -0.0210,  0.0391, -0.4430],
         [ 0.0816,  0.3171, -0.3153, -0.0760, -0.3497,  0.1926,  0.0474,
          -0.0311,  0.0793, -0.5933],
         [ 0.0661,  0.3829, -0.2475, -0.0384, -0.2970,  0.2038,  0.0891,
          -0.0173,  0.0254, -0.7366]]], grad_fn=<TransposeBackward0>)

In [408]:
walk_embs, hidden = lstm(l)

In [409]:
hidden

(tensor([[[ 0.0695,  0.4406, -0.2393,  0.4299, -0.3749,  0.7223, -0.1267,
            0.1923, -0.2109,  0.0517]],
 
         [[-0.1012,  0.2199,  0.1219, -0.0128, -0.2094, -0.0616,  0.0540,
            0.1520,  0.0629,  0.0184]],
 
         [[ 0.0740,  0.0130, -0.0857, -0.1324, -0.0572,  0.0302,  0.2230,
            0.0934, -0.0153,  0.1301]],
 
         [[ 0.0095, -0.0506,  0.0270, -0.0427,  0.1398,  0.1236,  0.0012,
           -0.1071,  0.0447,  0.0012]],
 
         [[ 0.0429,  0.1184,  0.2333, -0.0728, -0.0811,  0.2270,  0.0814,
           -0.0912,  0.0040,  0.1718]],
 
         [[ 0.1744,  0.0111, -0.1330, -0.1384, -0.0017,  0.0188,  0.0360,
            0.1133, -0.0173, -0.1226]],
 
         [[ 0.0173,  0.0233,  0.0372,  0.0795, -0.2491, -0.0192, -0.1484,
           -0.1563,  0.2679, -0.0179]],
 
         [[ 0.0739, -0.0769, -0.1860,  0.0529, -0.1925, -0.0917,  0.0945,
           -0.1328,  0.1562,  0.1146]],
 
         [[-0.1203, -0.1043,  0.2104, -0.0267,  0.0551, -0.1584,  0.1248

In [390]:
hn[:, -1, :][-1]

tensor([-0.1622,  0.0380,  0.0270, -0.0840, -0.0621, -0.0490,  0.0329,  0.0838,
         0.0326,  0.1116], grad_fn=<SelectBackward>)

In [343]:
f = torch.tensor([[2., 5., 6.]])

In [345]:
torch.cat((walk_embs[:, -1, :], f), 1)

tensor([[-0.1622,  0.0380,  0.0270, -0.0840, -0.0621, -0.0490,  0.0329,  0.0838,
          0.0326,  0.1116,  2.0000,  5.0000,  6.0000]], grad_fn=<CatBackward>)

In [367]:
hn.view(-1, -1, -1)

RuntimeError: only one dimension can be inferred

In [113]:
linear = nn.Linear(10, 1)

In [202]:
y = linear(walk_embs[:, -1, :])

In [203]:
y

tensor([[0.1932]], grad_fn=<AddmmBackward>)

In [71]:
embs = y[0]

In [72]:
embs

tensor([[ 0.2824,  0.3015, -0.2668,  ...,  0.2538,  0.1729, -0.2005],
        [ 0.3229,  0.2920, -0.2846,  ...,  0.2517,  0.1716, -0.1769],
        [ 0.3446,  0.2820, -0.2939,  ...,  0.2483,  0.1650, -0.1690],
        [ 0.3562,  0.2752, -0.2993,  ...,  0.2455,  0.1587, -0.1662],
        [ 0.3624,  0.2713, -0.3025,  ...,  0.2434,  0.1541, -0.1650]],
       grad_fn=<SelectBackward>)

In [73]:
embs[len([3, 5, 6, 7, 8])]

IndexError: index 5 is out of bounds for dimension 0 with size 5

In [425]:
inputs = [torch.randn(1, 3) for _ in range(5)]

In [426]:
inputs

[tensor([[ 2.0531,  0.2745, -0.0366]]),
 tensor([[ 0.0588,  0.5562, -1.3053]]),
 tensor([[0.4513, 0.2132, 1.7349]]),
 tensor([[-0.0582,  1.3860,  0.5462]]),
 tensor([[-0.2682, -0.8007, -0.0899]])]

In [427]:
torch.cat(inputs).view(len(inputs), 1, -1)

tensor([[[ 2.0531,  0.2745, -0.0366]],

        [[ 0.0588,  0.5562, -1.3053]],

        [[ 0.4513,  0.2132,  1.7349]],

        [[-0.0582,  1.3860,  0.5462]],

        [[-0.2682, -0.8007, -0.0899]]])

In [535]:
p.edges

defaultdict(set,
            {0: {2, 5, 8, 11, 14},
             2: {0, 1, 7, 8, 9, 14},
             5: {0, 3, 7, 9, 12, 13},
             8: {0, 2, 9, 13},
             11: {0, 1, 10, 12, 13},
             14: {0, 2, 3, 10, 12, 13},
             1: {2, 3, 11, 13},
             3: {1, 5, 6, 14},
             13: {1, 4, 5, 6, 8, 10, 11, 12, 14},
             7: {2, 4, 5},
             9: {2, 5, 8},
             6: {3, 12, 13},
             4: {7, 12, 13},
             12: {4, 5, 6, 11, 13, 14},
             10: {11, 13, 14}})

In [514]:
paths = [[2, 7, 5, 9], [2, 7, 5, 9, 8]]

In [515]:
graph_emb = agent.embed_graph(p.edges)

In [502]:
paths_emb = get_states_emb(paths, graph_emb)

In [504]:
paths_emb.shape

torch.Size([2, 512])

In [511]:
next_emb = graph_emb[8]

In [477]:
next_emb = next_emb.unsqueeze(0)

In [467]:
q_network_inputs = torch.stack([
torch.cat([get_states_emb([path], graph_emb), graph_emb[next_vertex].unsqueeze(0)] for next_vertex in {2, 5, 8}]
            )])

TypeError: expected Tensor as element 0 in argument 0, but got list

In [488]:
[torch.cat([get_states_emb([path], graph_emb), graph_emb[next_vertex].unsqueeze(0)]) for next_vertex in {2, 5, 8}]

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 512 and 256 in dimension 1 at /Users/administrator/nightlies/pytorch-1.0.0/wheel_build_dirs/conda_3.6/conda/conda-bld/pytorch_1544137972173/work/aten/src/TH/generic/THTensorMoreMath.cpp:1333

In [492]:
for next_vertex in {2, 5, 8}:
    print(torch.cat([get_states_emb([path], graph_emb), graph_emb[next_vertex].unsqueeze(0)], 1))

tensor([[-9.7202e-01,  7.7923e-02,  7.6080e-01, -5.0376e-01, -5.6276e-01,
         -2.8771e-01,  1.6087e-01,  7.3920e-01, -5.1050e-01,  7.5375e-01,
         -5.5776e-01, -1.2383e+00, -2.9269e-01,  4.7613e-01, -9.8817e-01,
          3.4015e-01, -4.5546e-01,  6.2430e-01, -6.5620e-02,  5.9016e-02,
          1.0267e+00, -9.5899e-01,  6.1672e-01,  8.1769e-01, -7.9431e-02,
         -3.2172e-01,  4.2354e-01,  1.6413e+00, -2.1666e-01, -4.8842e-01,
          1.0008e+00,  9.2398e-01, -8.2213e-01, -7.6344e-01,  1.0147e+00,
          9.6081e-01,  9.8737e-01,  4.3086e-01,  6.9872e-01,  3.2411e-01,
         -4.8209e-01,  1.2577e-01, -1.0857e-01, -1.1443e+00,  8.4729e-01,
          1.1149e-01,  2.5156e-01, -4.5330e-01,  1.2396e-01,  1.6943e-01,
          1.2370e+00,  8.1178e-01, -4.2973e-01, -9.9228e-01, -1.7468e-01,
         -9.6961e-02, -5.9296e-01, -2.4168e-01,  5.2235e-02, -4.4475e-01,
          1.1245e+00, -1.8958e-02,  5.4813e-01,  1.2968e-01,  8.9272e-01,
         -9.2226e-01, -1.6239e-01,  7.

In [506]:
paths_emb

tensor([[-0.9720,  0.0779,  0.7608,  ...,  0.4971,  0.4958,  0.1762],
        [-0.9927,  0.0789,  0.7894,  ...,  0.7697,  0.9993,  0.1767]])

In [505]:
[torch.cat([get_states_emb(paths, graph_emb), graph_emb[next_vertex].unsqueeze(0)]) for next_vertex in {2, 5, 8}]

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 2 and 1 in dimension 0 at /Users/administrator/nightlies/pytorch-1.0.0/wheel_build_dirs/conda_3.6/conda/conda-bld/pytorch_1544137972173/work/aten/src/TH/generic/THTensorMoreMath.cpp:1333

In [509]:
x = get_states_emb([[1,2,3]], graph_emb)

In [513]:
for i in x:
    print(torch.cat([i, next_emb]).shape)

torch.Size([768])


In [542]:
paths_embs = get_states_emb(paths, graph_emb)

In [523]:
values = agent.value(paths_emb)

In [612]:
values

tensor([[-0.1743],
        [-0.2147]], grad_fn=<AddmmBackward>)

In [615]:
states = []
for i, path in enumerate(paths):
    next_embs = []
    for next_vertex in p.edges[path[-1]]:
        next_vertex_emb = graph_emb[next_vertex]
        next_embs.append(torch.cat([paths_embs[i], next_vertex_emb]))
    states.append(torch.stack(next_embs))
print(states)
predicts = []
for i in states:
    predicts.append(sm(log_reg(i)).view(-1))

[tensor([[-0.9720,  0.0779,  0.7608,  ...,  0.7697,  0.9993,  0.1767],
        [-0.9720,  0.0779,  0.7608,  ...,  1.0486,  0.9713, -0.0328],
        [-0.9720,  0.0779,  0.7608,  ...,  1.2111,  1.4870,  0.0660]],
       grad_fn=<StackBackward>), tensor([[-0.9927,  0.0789,  0.7894,  ...,  0.9108,  0.9007,  0.0928],
        [-0.9927,  0.0789,  0.7894,  ...,  0.4971,  0.4958,  0.1762],
        [-0.9927,  0.0789,  0.7894,  ...,  1.0486,  0.9713, -0.0328],
        [-0.9927,  0.0789,  0.7894,  ...,  1.2688,  1.7698,  0.0705]],
       grad_fn=<StackBackward>)]


In [617]:
predicts

[tensor([0.3215, 0.3476, 0.3309], grad_fn=<ViewBackward>),
 tensor([0.2441, 0.2059, 0.2333, 0.3167], grad_fn=<ViewBackward>)]

In [573]:
log_reg = nn.Linear(768, 1)

In [581]:
sm = nn.Softmax(dim=0)

In [553]:
torch.states.unsqueeze(0)

AttributeError: module 'torch' has no attribute 'states'

In [534]:
p.edges

defaultdict(set,
            {0: {2, 5, 8, 11, 14},
             2: {0, 1, 7, 8, 9, 14},
             5: {0, 3, 7, 9, 12, 13},
             8: {0, 2, 9, 13},
             11: {0, 1, 10, 12, 13},
             14: {0, 2, 3, 10, 12, 13},
             1: {2, 3, 11, 13},
             3: {1, 5, 6, 14},
             13: {1, 4, 5, 6, 8, 10, 11, 12, 14},
             7: {2, 4, 5},
             9: {2, 5, 8},
             6: {3, 12, 13},
             4: {7, 12, 13},
             12: {4, 5, 6, 11, 13, 14},
             10: {11, 13, 14}})

In [549]:
len(states[0][0])

768