In [7]:
import sys
sys.path.append('../')
from deep_rl.reacher.env import MultiGoalReacherEnv, DiscretizeActionEnv
from deep_rl.network import *
from deep_rl.utils import *
from sklearn.decomposition import NMF
from random import shuffle
import matplotlib.pyplot as plt
import numpy as np
import torch
import random
import torch.nn.functional as F

def set_seed(t, r=None, p=None, c=None):
    if r is None:
        r = t
    if p is None:
        p = r
    torch.manual_seed(t)
    random.seed(r)
    np.random.seed(p)
    if c is not None:
        torch.cuda.manual_seed(c)

class GridDrawer:                           
    def __init__(self, color_list):
        self.color_list = np.asarray(color_list)

    # input: a 2-d index matrix
    # output: a 2-d rgb matrix
    def draw(self, indices, repeat=16):
        return np.uint8(255 * np.array(self.color_list[indices, :]).repeat(repeat, 0).repeat(repeat, 1))
    
# this is my color list
color_map = dict([
    #*[('grey-{}'.format(v), plt.cm.Greys(0.1 * v)) for v in range(1, 20)],
    *[('purple-{}'.format(v), plt.cm.Purples(0.05 * v)) for v in range(1, 20)],
    *[('blue-{}'.format(v), plt.cm.Blues(0.05 * v)) for v in range(1, 20)],
    *[('green-{}'.format(v), plt.cm.Greens(0.05 * v)) for v in range(1, 20)],
    *[('orange-{}'.format(v), plt.cm.Oranges(0.05 * v)) for v in range(1, 20)],
    *[('red-{}'.format(v), plt.cm.Reds(0.05 * v)) for v in range(1, 20)],
])

def imshow(img):
    display(Image.fromarray(np.asarray(img)))

color_list = list(color_map.values())
shuffle(color_list)
color_list = [plt.cm.Greys(0.9)] + [plt.cm.Greys(0.5)] + color_list
drawer = GridDrawer(color_list)

# multitask NMF from: https://ieeexplore.ieee.org/document/6939673
class MTNMF:
    def __init__(self, n_components, l1_ratio=0.0, max_iter=200, tol=0.0001):
        self.n_components = n_components
        self.l1_ratio = l1_ratio
        self.max_iter = max_iter
        self.tol = tol

    def loss(self, X, A, S):
        return 0.5 * ((X - np.matmul(A, S)) ** 2).sum() + self.l1_ratio * S.sum()
        
    # input: a stack of observed data X_1, ..., X_K
    # output: S, A_1, ..., A_K
    def fit(self, X):
        K, N, M = X.shape
        A = np.random.rand(K, N, self.n_components)
        S = np.random.rand(self.n_components, M)
        prev_loss = np.inf
        cur_loss = None
        for i in range(self.max_iter):
            A_T = A.transpose(0, 2, 1)
            new_S = S * (np.matmul(A_T, X).sum(0)) / (np.matmul(np.matmul(A_T, A), S).sum(0) + K * self.l1_ratio * np.ones((self.n_components, M)))
            S = new_S
            new_A = A * np.matmul(X, S.T) / np.matmul(np.matmul(A, S), S.T)
            A = new_A
            cur_loss = self.loss(X, A, S)
            if i % 100 == 0: print('NMF loss:', cur_loss)
            if abs(cur_loss - prev_loss) < self.tol: break
            prev_loss = cur_loss # update loss
        return A, S, {'loss': cur_loss, 'iter': i}
    
def rollout(env, policy, horizon):
    states = []
    done = False
    state = env.reset()
    info = dict(task_id=[0])
    for _ in range(horizon):
        states.append(state)
        action = policy([state], info)['a'][0].cpu().detach().numpy()
        state, _, _, _ = env.step(action) # note that info is not used
    return states

# MultiTask NMF (Discrete)

In [8]:
n_abs = 12
l1_ratio=0.0 # this is currently not working... since alpha is not set
state_dim = 16
action_dim = np.array((5, 5))
horizon = 100
n_trajs = 10

def get_expert(weight_path, state_dim, action_dim):
    expert = CategoricalActorCriticNet(
        4,
        state_dim,
        action_dim.prod(),
        FCBody(
            state_dim, 
            hidden_units=(16,)
        ),
        SplitBody(
            MultiLinear(16, action_dim.sum(), 4, key='task_id', w_scale=1e-3),
            2,
        ),
    )
    # load weight
    weight_dict = expert.state_dict()
    loaded_weight_dict = {k: v for k, v in torch.load(
        weight_path,
        map_location=lambda storage, loc: storage)['network'].items()
        if k in weight_dict}
    weight_dict.update(loaded_weight_dict)
    expert.load_state_dict(weight_dict)
    return expert

set_seed(0)

expert_dict = {
    1: '../log/reacher.1_corner/fc_discrete.baseline/split/0.190315-202731/models/step-704000-mean--6.36',
    2: '../log/reacher.2_corner/fc_discrete.baseline/split/0.190315-203310/models/step-704000-mean--17.16',
    3: '../log/reacher.3_corner/fc_discrete.baseline/split/0.190315-203532/models/step-704000-mean--11.25',
    #1: '../log/reacher.ng.1_corner/fc_discrete.baseline/ng/0.190316-161504/models/step-704000-mean--6.08',
    #2: '../log/reacher.ng.2_corner/fc_discrete.baseline/ng/0.190316-161535/models/step-704000-mean--26.97',
    #3: '../log/reacher.ng.3_corner/fc_discrete.baseline/ng/0.190316-162113/models/step-704000-mean--16.12',
}

envs = [DiscretizeActionEnv(
    MultiGoalReacherEnv(
        [
            [0.15, 0.0],
            [-0.15, 0.0],
            [0.0, 0.15],
            [0.0, -0.15],
        ],
        sample_indices=[i],
        with_goal_pos=True,
    ),
    n_bins=[5, 5],
) for i in range(3)]
decomposer = MTNMF(n_abs, max_iter=5000, tol=0.0001)

states = []
experts = dict()

for goal_idx, weight_path in expert_dict.items():
    experts[goal_idx] = get_expert(weight_path, state_dim, action_dim)
    for _ in range(n_trajs):
        states.append(rollout(envs[goal_idx-1], experts[goal_idx], horizon=horizon))
states = np.concatenate(states)
print('states shape:', states.shape)
    
pvs = []
    
for goal_idx in expert_dict:
    infos = {'task_id': [goal_idx-1] * len(states)}
    pv = F.softmax(experts[goal_idx].get_logits(states, infos), dim=-1).cpu().detach().numpy()
    print(pv.shape)
    pv = pv.reshape(pv.shape[0], -1)
    pvs.append(pv)

pvs = np.stack(pvs, 0)
A, S, info = MTNMF(n_abs, max_iter=5000, l1_ratio=l1_ratio).fit(pvs.transpose(0, 2, 1))
print(pvs.shape)

fsave(
    dict(
        abs=S.T,
        policies=list(pvs.reshape(pvs.shape[0], pvs.shape[1], 2, -1)),
        states=[states for _ in range(len(pvs))],
        infos=list([[{'task_id': i} for _ in range(len(states))] for i in range(3)]),
    ),
    '../data/nmf_sample/reacher/split.{}'.format(n_abs),
    'pkl',
)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
states shape: (3000, 16)
(3000, 2, 5)
(3000, 2, 5)
(3000, 2, 5)
NMF loss: 4027.374665543058
NMF loss: 250.77011882057573
NMF loss: 204.80058417975405
NMF loss: 198.09517701729047
NMF loss: 185.9808764199375
NMF loss: 171.61627925410673
NMF loss: 168.6125006130484
NMF loss: 166.5595851654577
NMF loss: 164.76836103546626
NMF loss: 163.4715893340555
NMF loss: 162.809205205354
NMF loss: 162.49668904753793
NMF loss: 162.14074261081205
NMF loss: 161.9176799854498
NMF loss: 161.72103180513267
NMF loss: 161.6446155086667
NMF loss: 161.52996433691663
NMF loss: 161.4593122674847
NMF loss: 161.4019961253998
NMF loss: 161.3430382182821
NMF loss: 161.30910854466694
NMF loss: 161.2

# MultiTask NMF (continuous)

In [11]:
n_abs = 6
l1_ratio=0.0 # this is currently not working... since alpha is not set
state_dim = 8
action_dim = 2
horizon = 100
n_trajs = 10
n_bins = 10

set_seed(0)

def get_expert(weight_path, state_dim, action_dim):
    expert = GaussianActorCriticNet(
        4,
        state_dim,
        action_dim,
        FCBody(
            state_dim, 
            hidden_units=(32,)
        ),
    )
    # load weight
    weight_dict = expert.state_dict()
    loaded_weight_dict = {k: v for k, v in torch.load(
        weight_path,
        map_location=lambda storage, loc: storage)['network'].items()
        if k in weight_dict}
    weight_dict.update(loaded_weight_dict)
    expert.load_state_dict(weight_dict)
    return expert

expert_dict = {
    1: '../log/reacher.ng.1_corner/fc_discrete.gaussian/cont/0.190316-212125/models/step-128000-mean--18.15',
    2: '../log/reacher.ng.2_corner/fc_discrete.gaussian/cont/0.190316-212209/models/step-128000-mean--11.82',
    3: '../log/reacher.ng.3_corner/fc_discrete.gaussian/cont/0.190316-214339/models/step-128000-mean--5.18',
}

envs = [MultiGoalReacherEnv(
        [
            [0.15, 0.0],
            [-0.15, 0.0],
            [0.0, 0.15],
            [0.0, -0.15],
        ],
        sample_indices=[i],
        with_goal_pos=False,
) for i in range(3)]
decomposer = MTNMF(n_abs, max_iter=5000, tol=0.0001)

states = []
ax = np.linspace(-1, 1, n_bins)
ay = np.linspace(-1, 1, n_bins)
ax, ay = np.meshgrid(ax, ay)
actions = np.stack([ax, ay], -1)

experts = dict()

for goal_idx, weight_path in expert_dict.items():
    experts[goal_idx] = get_expert(weight_path, state_dim, action_dim)
    for _ in range(n_trajs):
        states.append(rollout(envs[goal_idx-1], experts[goal_idx], horizon=horizon))
states = np.concatenate(states)
print('states shape:', states.shape)
    
pvs = []
    
for goal_idx in expert_dict:
    infos = {'task_id': [goal_idx-1] * len(states)}
    mean = experts[goal_idx](states, infos)['mean'].cpu().detach().numpy()
    std = experts[goal_idx].std.expand(len(states), *std.shape)
    print(mean.shape, std.shape)
    #pv = F.softmax(experts[goal_idx].get_logits(states, infos), dim=-1).cpu().detach().numpy()
    pv = pv.reshape(pv.shape[0], -1)
    pvs.append(pv)

pvs = np.stack(pvs, 0)
A, S, info = MTNMF(n_abs, max_iter=5000, l1_ratio=l1_ratio).fit(pvs.transpose(0, 2, 1))

fsave(
    dict(
        abs=S.T,
        policies=list(pvs.reshape(pvs.shape[0], pvs.shape[1], 2, -1)),
        states=[states for _ in range(len(pvs))],
        infos=list([[{'task_id': i} for _ in range(len(states))] for i in range(3)]),
    ),
    '../data/nmf_sample/reacher/cont.ng.{}'.format(n_abs),
    'pkl',
)


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
states shape: (3000, 8)


TypeError: forward() missing 1 required positional argument: 'info'

In [7]:
import torch
import numpy as np

print(torch.__version__)

a = torch.Tensor([1, 2, 3])
print(a.expand(3, 3))

# (1, 2, 3)
#probs = torch.Tensor([[[0.2, 0.3, 0.5], [0.1, 0.3, 0.6]]])
#print(torch.nn.functional.softmax(probs, dim=2))
# probs = torch.Tensor([[0.2, 0.3, 0.5]])
# print(probs.shape[0])
# log_probs = torch.log(probs)
# print(log_probs)
# dist = torch.distributions.Categorical(logits=log_probs)
# action = dist.sample()
# print(action)
# log_prob = dist.log_prob(action)
# print(log_prob)

0.4.0
tensor([[ 1.,  2.,  3.],
        [ 1.,  2.,  3.],
        [ 1.,  2.,  3.]])
