In [2]:
import numpy as np
import networkx as nx

#%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
%config InlineBackend.figure_format = 'retina'

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataset import TensorDataset
from torch.utils.data import DataLoader # (testset, batch_size=4,shuffle=False, num_workers=4)
from torch.optim.lr_scheduler import ReduceLROnPlateau as RLRP
from torch.nn.parallel import DistributedDataParallel, DataParallel
from torch.nn.init import xavier_normal
from torch.nn.parameter import Parameter
import torchvision.datasets as datasets
import torchvision.transforms as transforms

import sys
from datetime import datetime
from functools import reduce
import os
import os.path
import pandas as pd
import pickle
import importlib
from collections import Counter
from copy import deepcopy
from collections import OrderedDict

import nkmodel as nk
import ppo.core as core
from ppo.ppo import PPOBuffer
from utils.utils import max_mean_clustering_network
import envs
import json
from itertools import product
from functools import reduce  

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import matplotlib.gridspec as gridspec

In [3]:
E = 32
M = 100
N = 15
K = 3
NN = 3
exp = 8
trj_len = 200
graph_type = 'complete'
reward_type = 'indv_raw_full'
action_type = 'total'
extra_type = 'SI'
env_name = 'SL_NK_' + action_type

nx_dict = {'complete': nx.complete_graph, 'ba': nx.barabasi_albert_graph, 'er': nx.erdos_renyi_graph, 'maxmc':max_mean_clustering_network} 
nx_arg_dict = {'complete': {'n': M}, 'ba': {'n': M, 'm': 19}, 'er': {'n': M, 'p': 0.3}, 'maxmc': {'n': M}}

env_kwargs = {
        'E': E,
        'M': M,
        'N': N,
        'K': K,
        'neighbor_num': NN,
        'exp': exp,
        'graph': nx_dict[graph_type],
        'graph_dict': nx_arg_dict[graph_type],
        'reward_type': reward_type,
        'action_type': action_type,
        'extra_type': extra_type,
    'corr_type': 'TT'
    }

In [None]:
# Phase transition을 찾는 NN
# 다양한 example에서 공통된 미방의 꼴을 찾는 (공통의 정의?)
# 수열의 embedding (f(x), -5 < x < 5 범위의 수열을 함수의 symbolic한 form과 매칭시키는 함수)
# dynamics를 embedding하는 것 (CA를 embedding하면 비슷한 것끼리 뭉치나?
# NN의 classification 분포를 바탕으로 판단하는 다른 NN (이미 있을 것)
# GA의 Neural Net version

# 내재적인 rule이 있는지, 얼마나 쉬운지의 여부를 측정하는 neural measure?
# test_loss / train_loss를 minimize 할 수 있는가?
# neural network 1 의 output을 reward로 받는 neural network 2

## Model Loading

In [4]:
def load_model(exp_name, epoch):

    #rel_path = f'data/runs/ds_complete_indv_raw_random_SIR_N10K3NN3_new_rand/{exp_name}/{exp_name}_s42/'
    rel_path = f'data/runs/{exp_name}/{exp_name}_s42/'

    with open(rel_path + "config.json") as json_file:
        json_data = json.load(json_file)
    env_kwargs = json_data['env_kwargs']
    env_name = json_data['env_name']
    env_kwargs['graph'] = nx.complete_graph
    ac_kwargs = json_data['ac_kwargs']
    ac_kwargs['activation'] = nn.Tanh()
    arch = json_data['arch']
    trj_len = json_data['trj_len']
    gamma = json_data['gamma']
    lam = json_data['lam']
    epochs = json_data['epochs']
    seed = json_data['seed']
    ensemble_num = env_kwargs['E']
    agent_num = env_kwargs['M']
    env_scheduler_kwargs = {
            'local_rank': 0,
            'exp_name': exp_name,
            'E': env_kwargs['E'],
            'N': env_kwargs['N'],
            'K': env_kwargs['K'],
            'exp': env_kwargs['exp'],
            'NGPU': 1, #'data_dir': 'D:\\OneDrive\\연구\\ML\\MyProject\\SocialNet\\SocialNet\\data\\runs\\ds_complete_indv_raw_random_SIR_N10K3NN3_new_rand'
        'data_dir': 'D:\\OneDrive\\연구\\ML\\MyProject\\SocialNet\\SocialNet\\data\\runs'
    }
    env_kwargs['env_scheduler'] = envs.__dict__['random_env_scheduler'](**env_scheduler_kwargs)
    json_data['corr_type'] = 'TT'
    env_kwargs['corr_type'] = 'TT'
    if len(env_kwargs['reward_type']) < 9:
        print('modify')
        env_kwargs['reward_type'] = env_kwargs['reward_type'] + '_full'
    torch.manual_seed(seed)
    np.random.seed(seed)
    env = envs.__dict__[env_name](**env_kwargs)
    action_type = env_kwargs['action_type']
    extra_type = env_kwargs['extra_type']
    extra_num = len(extra_type)
    # Instantiate environment
    if action_type == 'total':
        obs_dim = (env.neighbor_num + 1, env.N + extra_num)  # (3+1, 15+2)
        act_dim = env.action_space.n
        dim_len = env.N
    elif action_type == 'split':
        obs_dim = (env.neighbor_num + 1, 1 + extra_num)
        act_dim = (2,)
        dim_len = env.N
        
    checkpoint = torch.load(rel_path+f'pyt_save/model{epoch}.pth')
    ac = core.ActorCritic(obs_dim, act_dim, arch, **ac_kwargs)
    ac.pi.load_state_dict(checkpoint['pi'])
    ac.v.load_state_dict(checkpoint['v'])

    Parallel = DataParallel
    parallel_args = {
        'device_ids': list(range(1)),
        'output_device': 0
    } 

    ac.pi = Parallel(ac.pi, **parallel_args)
    ac.v = Parallel(ac.v, **parallel_args)
    ac.eval()
    return ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs

In [None]:
from utils.sampler import BatchSampler
ensemble_num = 10
agent_num = 100
trj_len = 200
batch_size = 1000

train_sampler = BatchSampler(ensemble_num, agent_num, trj_len, batch_size, device=device)

## Model Test

In [5]:
# complete_L200 2025
# complete_L200_2 2269
# complete_L300 2593
# maxmc_L100 4483
# maxmc_L200 3761
# 79, 177
exp_name = 'test_test'
epoch = 5000 # 5738
ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs = load_model(exp_name, epoch)
reward_supply_type = 'full'
env_kwargs['rescale'] = False
terminate = True

In [6]:
env_kwargs

{'E': 10,
 'K': 11,
 'M': 100,
 'N': 15,
 'action_type': 'total',
 'corr_type': 'TT',
 'env_scheduler': <envs.env_scheduler.random_env_scheduler at 0x16333a41708>,
 'exp': 8,
 'extra_type': 'SIRF',
 'graph': <function decorator.complete_graph(n, create_using=None)>,
 'graph_dict': {'n': 100},
 'graph_type': 'complete',
 'neighbor_num': 3,
 'rescale': False,
 'reward_type': 'indv_raw_full'}

In [7]:
# normal test, without unique/prob
buf_list = []
final_score_list = []

Ret_list = []
env_num = 50
env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]

for i in range(env_num):
    print(i)
    test_ensemble_num = 100
    buf = PPOBuffer(
        obs_dim, 
        act_dim, 
        test_ensemble_num, 
        env_kwargs['M'], 
        dim_len, 
        trj_len, 
        gamma, 
        lam, 
        split=True if env_kwargs['action_type'] == 'split' else False)

    
    env = env_list[i]
    o, _ = env.reset(test_ensemble_num, base=True) 
    ep_ret, ep_len = 0, 0
    best_ep_ret = -np.inf

    for t in range(trj_len):
        epoch_ended = t == trj_len - 1
        a, v, logp, pi = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'), return_pi=True)

        next_o, r, s = env.step(a)
        ep_ret += r
        ep_len += 1
        
        if reward_supply_type == 'full':
            buf.store(o, a, r, v, s, logp)
        else:
            if epoch_ended:
                if reward_supply_type == 'final':
                    buf.store(o, a, r * trj_len, v, s, logp)
                elif reward_supply_type == 'finalmean':
                    buf.store(o, a, ep_ret, v, s, logp)
                else:
                    raise NotImplementedError
            else:
                buf.store(o, a, 0, v, s, logp)

        # Update obs (critical!)
        o = next_o
        
        if epoch_ended:
            a, v, logp, pi = ac.step(
                torch.as_tensor(o, dtype=torch.float32, device='cuda' ),
                return_pi=True
            )
            _, _, s = env.step(a)
            if terminate:
                buf.finish_path(np.zeros_like(v))
            else:
                buf.finish_path(v)
    
    Ret=ep_ret / ep_len
    Ret_list.append(Ret)
    EpLen=ep_len
    FinalScore=np.mean(s)
    buf_list.append(buf)
    final_score_list.append(FinalScore)
    ep_ret, ep_len = 0, 0

Ret_list = np.array(Ret_list)
final_score_list = np.array(final_score_list)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


MemoryError: Unable to allocate 4.35 MiB for an array with shape (100, 100, 3, 19) and data type float64

In [None]:
Ret_list = np.array(Ret_list)
final_score_list = np.array(final_score_list)

In [None]:
np.mean(Ret_list), np.mean(final_score_list)

In [None]:
scr_buf_list = []
for buf in buf_list:
    scr_buf_list.append(buf.scr_buf)
scr_buf_list = np.array(scr_buf_list)

In [None]:
inspection_dict = {}
inspection_dict['scr_buf_list'] = scr_buf_list

In [None]:
with open(f'./result/inspection_dict/inspection_dict_{exp_name}_E{epoch}.pkl', 'wb') as f:
    pickle.dump(inspection_dict, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# original landscape test

buf_list = []
final_score_list = []
unq_buf_list = []
prob_buf_list = []
Ret_list = []
env_num = 5
env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]

for i in range(env_num):
    print(i)
    test_ensemble_num = 1
    buf = PPOBuffer(
        obs_dim, 
        act_dim, 
        test_ensemble_num, 
        env_kwargs['M'], 
        dim_len, 
        trj_len, 
        gamma, 
        lam, 
        split=True if env_kwargs['action_type'] == 'split' else False)
    unq_buf = np.zeros((test_ensemble_num, trj_len), dtype=np.float32)
    prob_buf = []
    
    env = env_list[i]
    env_list[i].env_scheduler.local_rank = i
    o, _ = env.reset(test_ensemble_num)
    print(env.landscape.get_global_max()[1])
    ep_ret, ep_len = 0, 0
    best_ep_ret = -np.inf

    for t in range(trj_len):
        epoch_ended = t == trj_len - 1
        a, v, logp, pi = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'), return_pi=True)
        prob_buf.append(pi.probs[..., 1].detach().cpu().numpy())
        
        next_o, r, s = env.step(a)
        ep_ret += r
        ep_len += 1
        #print(np.mean(r), np.mean(s))
        # save and log
        
        if reward_supply_type == 'full':
            buf.store(o, a, r, v, s, logp)
        else:
            if epoch_ended:
                if reward_supply_type == 'final':
                    buf.store(o, a, r * trj_len, v, s, logp)
                elif reward_supply_type == 'finalmean':
                    buf.store(o, a, ep_ret, v, s, logp)
                else:
                    raise NotImplementedError
            else:
                buf.store(o, a, 0, v, s, logp)

        # Update obs (critical!)
        o = next_o
        for e in range(test_ensemble_num):
            freq = np.unique(a[e], axis=0)
            unq_buf[e][t] = freq.shape[0]
        
        if epoch_ended:
            a, v, logp, pi = ac.step(
                torch.as_tensor(o, dtype=torch.float32, device='cuda' ),
                return_pi=True
            )
            _, _, s = env.step(a)
            if terminate:
                buf.finish_path(np.zeros_like(v))
            else:
                buf.finish_path(v)
            prob_buf.append(pi.probs[..., 1].detach().cpu().numpy())
            for e in range(test_ensemble_num):
                freq = np.unique(a[e], axis=0)
                unq_buf[e][t] = freq.shape[0]
    
    unq_buf_list.append(unq_buf)
    prob_buf_list.append(prob_buf)
    Ret=ep_ret / ep_len
    Ret_list.append(Ret)
    EpLen=ep_len
    FinalScore=np.mean(s)
    buf_list.append(buf)
    final_score_list.append(FinalScore)
    ep_ret, ep_len = 0, 0

unq_buf_list = np.array(unq_buf_list)
prob_buf_list = np.array(prob_buf_list)
Ret_list = np.array(Ret_list)
final_score_list = np.array(final_score_list)

inspection_dict = {}
inspection_dict['buf_list'] = buf_list
inspection_dict['unq_buf_list'] = unq_buf_list
inspection_dict['prob_buf_list'] = prob_buf_list

#with open('./result/inspection_dict/inspection_dict_complete_N15K7NN3L200_disc_g99_I100_L200_RST_TMT.pkl', 'wb') as f:
#    pickle.dump(inspection_dict, f, pickle.HIGHEST_PROTOCOL)

In [None]:
buf_list[4].rew_buf[0][0]

In [None]:
rel_path = f'data/runs/{exp_name}/'

with open(rel_path + f"{exp_name}_landscape_list.pkl", 'rb') as f:
    landscape_list = pickle.load(f)

In [None]:
for i in range(env_num):
    print(landscape_list[i].get_global_max()[1])

In [None]:

example = a[0][6]
land.get_value(example)/land.get_global_max()[1]

In [None]:
env.reward_group_type

In [None]:
with open('./result/inspection_dict/inspection_dict_complete_N15K7NN3L200_disc_g99_I100_L200_RST_TMT.pkl', 'wb') as f:
    pickle.dump(inspection_dict, f, pickle.HIGHEST_PROTOCOL)

In [None]:
env = env_list[10]
o, _ = env.reset(test_ensemble_num)

In [None]:
# single env batch test
buf_list = []
final_score_list = []
unq_buf_list = []
prob_buf_list = []
Ret_list = []
env_num = 10
test_env = envs.__dict__[env_name](**env_kwargs)
env_list = [test_env for i in range(env_num)]

for i in range(env_num):
    print(i)
    test_ensemble_num = 100
    buf = PPOBuffer(
        obs_dim, 
        act_dim, 
        test_ensemble_num, 
        env_kwargs['M'], 
        dim_len, 
        trj_len, 
        gamma, 
        lam, 
        split=True if env_kwargs['action_type'] == 'split' else False)
    unq_buf = np.zeros((test_ensemble_num, trj_len), dtype=np.float32)
    prob_buf = []
    
    env = env_list[i]
    o, _ = env.reset(test_ensemble_num) 
    ep_ret, ep_len = 0, 0
    best_ep_ret = -np.inf

    for t in range(trj_len):
        epoch_ended = t == trj_len - 1
        a, v, logp, pi = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'), return_pi=True)
        prob_buf.append(pi.probs[..., 1].detach().cpu().numpy())
        
        next_o, r, s = env.step(a)
        ep_ret += r
        ep_len += 1
        #print(np.mean(r), np.mean(s))
        # save and log
        
        if reward_supply_type == 'full':
            buf.store(o, a, r, v, s, logp)
        else:
            if epoch_ended:
                if reward_supply_type == 'final':
                    buf.store(o, a, r * trj_len, v, s, logp)
                elif reward_supply_type == 'finalmean':
                    buf.store(o, a, ep_ret, v, s, logp)
                else:
                    raise NotImplementedError
            else:
                buf.store(o, a, 0, v, s, logp)

        # Update obs (critical!)
        o = next_o
        for e in range(test_ensemble_num):
            freq = np.unique(a[e], axis=0)
            unq_buf[e][t] = freq.shape[0]
        
        if epoch_ended:
            a, v, logp, pi = ac.step(
                torch.as_tensor(o, dtype=torch.float32, device='cuda' ),
                return_pi=True
            )
            _, _, s = env.step(a)
            if terminate:
                buf.finish_path(np.zeros_like(v))
            else:
                buf.finish_path(v)
           
            prob_buf.append(pi.probs[..., 1].detach().cpu().numpy())
            for e in range(test_ensemble_num):
                freq = np.unique(a[e], axis=0)
                unq_buf[e][t] = freq.shape[0]
    
    unq_buf_list.append(unq_buf)
    prob_buf_list.append(prob_buf)
    Ret=ep_ret / ep_len
    Ret_list.append(Ret)
    EpLen=ep_len
    FinalScore=np.mean(s)
    buf_list.append(buf)
    final_score_list.append(FinalScore)
    ep_ret, ep_len = 0, 0

unq_buf_list = np.array(unq_buf_list)
prob_buf_list = np.array(prob_buf_list)
Ret_list = np.array(Ret_list)
final_score_list = np.array(final_score_list)

In [None]:
# multiple epochs test

epoch_list = [1600, 1800, 2000, 2025, 2200, 2400, 2600, 2800, 3000, 3200]
result = {}

for epoch in epoch_list :
    print(epoch)
    exp_name = 'st_complete_indv_raw_full_total_random_SI_TT_N15K7NN3_new_rand200'
    ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs = load_model(exp_name, epoch)
    score_list = []
    env_num = 10
    test_ensemble_num = 100
    env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]

    for i in range(env_num):
        print(i)
        buf = PPOBuffer(
            obs_dim, 
            act_dim, 
            test_ensemble_num, 
            env_kwargs['M'], 
            dim_len, 
            trj_len, 
            gamma, 
            lam, 
            split=True if env_kwargs['action_type'] == 'split' else False)

        env = env_list[i]
        o, _ = env.reset(test_ensemble_num) 
        ep_ret, ep_len = 0, 0
        best_ep_ret = -np.inf
        env.scores.flatten().max()

        for t in range(trj_len):
            a, v, logp, pi = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'), return_pi=True)

            next_o, r, s = env.step(a)
            ep_ret += r
            ep_len += 1
            #print(np.mean(r), np.mean(s))
            # save and log
            buf.store(o, a, r, v, s, logp)

            # Update obs (critical!)
            o = next_o
            epoch_ended = t == trj_len - 1

            if epoch_ended:
                a, v, logp, pi = ac.step(
                    torch.as_tensor(o, dtype=torch.float32, device='cuda' ),
                    return_pi=True
                )
                _, _, s = env.step(a)
                buf.finish_path(v)

        score_list.append(buf.scr_buf)
        ep_ret, ep_len = 0, 0
        
    result[epoch] = np.array(score_list)

In [None]:
inspection_dict = {}
inspection_dict['buf_list'] = buf_list
inspection_dict['unq_buf_list'] = unq_buf_list
inspection_dict['prob_buf_list'] = prob_buf_list

with open('./result/inspection_dict/inspection_dict_complete_N15K7NN3L200_newppo_g95_1.pkl', 'wb') as f:
    pickle.dump(inspection_dict, f, pickle.HIGHEST_PROTOCOL)

In [None]:
def explained_variance(y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    """
    Computes fraction of variance that ypred explains about y.
    Returns 1 - Var[y-ypred] / Var[y]
    interpretation:
        ev=0  =>  might as well have predicted zero
        ev=1  =>  perfect prediction
        ev<0  =>  worse than just predicting zero
    :param y_pred: the prediction
    :param y_true: the expected value
    :return: explained variance of ypred and y
    """
    y_pred, y_true = y_pred.flatten(), y_true.flatten()
    var_y = np.var(y_true)
    return np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y

In [None]:
Ret_list = np.array(Ret_list)
final_score_list = np.array(final_score_list)
np.mean(Ret_list), np.mean(final_score_list)

In [None]:
(0.7796671434669661, 87.42018489279255)

In [None]:
plt.plot(buf_list[0].ret_buf[5][14])
plt.plot(buf_list[0].val_buf[5][14])

In [None]:
buf_list[0].rew_buf

In [None]:
plt.plot(buf_list[0].rew_buf[1][11])

In [None]:
np.mean((buf_list[0].val_buf[1].flatten() - buf_list[0].ret_buf[1].flatten())**2)

In [None]:
explained_variance(buf_list[0].val_buf, buf_list[0].ret_buf)

In [None]:
with open('./result/inspection_dict/inspection_dict_complete_N15K7NN3L200_disc_g99_I100_L200_RST_TMT.pkl', 'rb') as f:
    inspection_dict = pickle.load(f)

In [None]:
buf_list = inspection_dict['buf_list']
unq_buf_list = inspection_dict['unq_buf_list']
prob_buf_list = inspection_dict['prob_buf_list']

In [None]:
plt.plot(buf_list[0].adv_buf[i][j])
plt.plot(buf_list2[0].adv_buf[i][j])

In [None]:
plt.plot(buf_list[0].val_buf[i][j])
plt.plot(buf_list2[0].val_buf[i][j])

In [None]:
plt.plot(buf_list[0].rew_buf[i][j])
plt.plot(buf_list2[0].rew_buf[i][j])

In [None]:
buf_scr_list = []
for i in range(len(buf_list)):
    buf_scr_list.append(buf_list[i].scr_buf)
buf_scr_list = np.array(buf_scr_list) 

In [None]:
#x = baseline_data_dict['FollowBest_indv']['scr_buf']
x = buf_scr_list
for i in range(10):
    print(np.mean(x[i]), np.std(x[i]))
    
print(np.mean(x), np.std(x))

In [None]:
np.std(np.mean(x, axis=1))

In [None]:
#x = baseline_data_dict['FollowBest_indv']['scr_buf']
x = buf_scr_list
for i in range(10):
    print(np.mean(x[i]), np.std(x[i]))
    
print(np.mean(x), np.std(x))

In [None]:
buf_ret_list = []
for i in range(len(buf_list)):
    buf_ret_list.append(buf_list[i].ret_buf)
buf_ret_list = np.array(buf_ret_list)

In [None]:
buf_scr_list2 = []
for i in range(len(buf_list)):
    buf_scr_list2.append(buf_list[i].scr_buf)
buf_scr_list2 = np.array(buf_scr_list)

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
color_list = ['green', 'blue','orangered', 'gold', 'purple', 'cyan', 'black']
counter=0

x = buf_scr_list
avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label='RL_TT')
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
ax.set_xlabel('Time')
ax.set_ylabel('Average Performance')
ax.legend()

In [None]:
with open(f'./data/baseline_data/baseline_complete_N15K7NN3.pkl', 'rb') as f:
    baseline_data_dict = pickle.load(f)

In [None]:
with open(f'./result/baseline_complete_N15K7NN3.pkl', 'rb') as f:
    baseline_data_dict = pickle.load(f)

In [None]:
baseline_data_dict.keys()

In [None]:
np.mean(baseline_data_dict['FollowBest_random']['scr_buf'][:, :, :, :100])

In [None]:
baselines = ['FollowBest', 'FollowBest_indv', 'FollowBest_random', 'FollowBest_prob',
             'FollowMajor', 'FollowMajor_indv', 'FollowMajor_random', 'FollowMajor_prob',
            'IndvLearning', 'IndvRandom', 'IndvProb', 'RandomCopy']

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=200)
ax = fig.add_subplot(111)

color_list = ['red', 'blue', 'orange', 'yellow', 'black', 'gray', 'limegreen', 'darkgreen','deepskyblue', 'royalblue', 'purple', 'gold']
marker_list = ['o', 'x', 's', 'p', '*', '<', '>', 'd']
label_dict = {'FollowBest':'BI', 'FollowBest_indv':'BI-I', 'FollowBest_random':'BI-R', 'FollowBest_prob':'BI-P',
              'FollowMajor':'CF', 'FollowMajor_indv':'CF-I', 'FollowMajor_random':'CF-R', 'FollowMajor_prob':'CF-P',
             'IndvLearning':'PI', 'IndvRandom':'PI-R', 'IndvProb':'PI-P', 'RandomCopy':'RI'}

counter=0
if baselines:
    for baseline_name in baselines:
        x = baseline_data_dict[baseline_name]['scr_buf']
        avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
        std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
        ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], ls=(0, (3, 2)), label=label_dict[baseline_name])
        #ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
        counter+=1

x = buf_scr_list
avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
ax.plot(np.arange(x.shape[-1]), avg_pf, c='r', label='RL')

#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
ax.set_xlabel('Time')
ax.set_ylabel('Average Payoff')
ax.legend(fontsize=8, loc=4)
fig_name = 'st_complete_indv_raw_full_total_random_SI_TT_N15K7NN3_disc_g99_I100_L200_RST_TMT'
#plt.savefig(f'./result/figure/{fig_name}.png')

In [None]:
for key in baseline_data_dict2.keys():
    if key != 'keys':
        print(key)
        for key2 in baseline_data_dict2['keys']:
            print(baseline_data_dict2[key][key2])

In [None]:
complete_dict = {'CF-I':[64.77, 78.64], 'BI-I': [55.65, 55.53], 'BI':[31.31, 31.51], 'PI':[32.76, 35.95], 'RI':[31.01, 31.53], 'CF':[4.33, 4.39]}

In [None]:
length = len(complete_dict)
if length%2:
    # odd
    start = -(length//2)*width
else:
    # even
    start = -((length//2)-0.5)*width
offset = [start + i*width for i in range(length)]


In [None]:
offset

In [None]:
%matplotlib inline
fig = plt.figure(figsize=(8,3), dpi=200)
ax = fig.add_subplot(111)

labels = ['Average', 'Final']
complete_dict = {'RL':[70, 80], 'CF-I':[64.77, 78.64], 'BI-I': [55.65, 55.53], 'BI':[31.31, 31.51], 'PI':[32.76, 35.95], 'RI':[31.01, 31.53], 'CF':[4.33, 4.39]}
color_list = ['red', 'limegreen', 'darkgreen','deepskyblue', 'royalblue', 'purple', 'gold']
center  = np.array([0., 3.])
width = 0.2
length = len(complete_dict)
if length%2: 
    start = -(length//2)*width
else: 
    start = -((length//2)-0.5)*width
offset = [start + i * width for i in range(length)]

for i, key in enumerate(complete_dict.keys()):
    ax.bar(center+offset[i], complete_dict[key], width, label=key, color = color_list[i])
        
#BI_rect = ax.bar(x - width/2, con, width, label='CN', color = 'salmon')
#BI_rect = ax.bar(x + width/2, siam, width, label='SNN', color = 'skyblue')

ax.set_ylabel('Payoff', fontsize=12)
ax.set_xticks(center)
ax.set_xticklabels(labels, fontsize=12)
ax.legend(loc=8, ncol=2, fontsize = 8)

fig.tight_layout()

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
color_list = ['green', 'blue','orangered', 'gold', 'purple', 'cyan', 'black']
counter=0

x = unq_buf_list
avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
ax.plot(np.arange(x.shape[-1]), avg_pf, c='black', lw=3, label='RL_TT')

x = baseline_data['FollowMajor_indv']['unq_buf']
avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label='FollowMajor_indv')
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
counter+=1


#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
ax.set_xlabel('Time')
ax.set_ylabel('Unique states')
ax.legend()

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
color_list = ['green', 'blue','orangered', 'gold', 'purple', 'cyan', 'black']
counter=0

#x = buf_scr_list
#avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
#std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
#ax.plot(np.arange(x.shape[-1]), avg_pf, c='black', lw=3, label='RL_TT')
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)


if baselines:
    for baseline_name in baselines:
        x = baseline_data_dict[baseline_name]['scr_buf']
        avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
        std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
        ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label=baseline_name)
        #ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
        counter+=1


ax.set_xlabel('Time')
ax.set_ylabel('Average Performance')
ax.legend(fontsize=6)


In [None]:
@interact
def test1(s1=widgets.IntSlider(min=0, max=100, step=1, value=40, description='Score_1:'), 
            s2=widgets.IntSlider(min=0, max=100, step=1, value=10, description='Score_2:'),
            s3=widgets.IntSlider(min=0, max=100, step=1, value=20, description='Score_3:'),
            s4=widgets.IntSlider(min=0, max=100, step=1, value=60, description='Score_4:')
             ):
    %matplotlib inline
    norm_const = 1
    fig = plt.figure(figsize = (12, 4), dpi=200, constrained_layout=True)
    widths = [0.5, 0.2, 0.2, 0.2]
    heights = [1]
    gs = fig.add_gridspec(nrows=1, ncols=4, width_ratios=widths, height_ratios=heights)

    ax10 = fig.add_subplot(gs[0])
    s1 /= norm_const
    s2 /= norm_const
    s3 /= norm_const
    s4 /= norm_const
    
    #'''
    indv_obs_data = np.array([
                  [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, s1, 1],
                [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, s2, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, s3, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, s4, 0]])
    '''
    indv_obs_data = np.array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, s1, 1], 
              [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, s2, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, s3, 0],
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, s4, 0]])
    '''
    indv_obs_score = [s1, s2, s3, s4]
    ax10.imshow(indv_obs_data[..., :-2], cmap=cm.binary)
    ax10.set_title(f'Observation')
    #ax10.set_xticks([])
    #ax10.set_yticks([])

    ax10.annotate("score", (15, -0.5), fontsize=10, annotation_clip=False)
    ax10.annotate("%0.2f" % indv_obs_score[0], (15, 0.5), fontsize=10, annotation_clip=False)
    ax10.annotate("%0.2f" % indv_obs_score[1], (15, 1.5), fontsize=10, annotation_clip=False)
    ax10.annotate("%0.2f" % indv_obs_score[2], (15, 2.5), fontsize=10, annotation_clip=False)
    ax10.annotate("%0.2f" % indv_obs_score[3], (15, 3.5), fontsize=10, annotation_clip=False)

    ax11 = fig.add_subplot(gs[1])
    a, v, logp, pi = ac.step(torch.as_tensor(indv_obs_data, dtype=torch.float32, device='cuda').unsqueeze(0), return_pi=True)
    prob_data = pi.probs[..., 1].detach().cpu().numpy()
    ax11.imshow(prob_data, cmap=cm.binary, vmin=0, vmax=1)
    ax11.set_title('Prob.')
    ax11.set_xticks([])
    ax11.set_yticks([])

    ax12 = fig.add_subplot(gs[2])
    act_data = a
    ax12.imshow(act_data, cmap=cm.binary)
    #action_score = env_list[0].get_score(act_data.reshape(1, 1, -1)).item()
    ax12.set_title('Action')
    ax12.set_xticks([])
    ax12.set_yticks([])

# 1등 점수, 2등과 3등 점수의 합 (대충?), 2등과 1등의 점수 차가 영향을 미침
# 내 점수가 20 이상인데 다른 개체들과의 차이가 심하면 (최저 -10?) 메이저 베낌
# 자기보다 다들 높으면 확실히 best로 가려고 함 (2등과 1등 점수차가 상대적으로 작아도)
# 반대로 2, 3등이 고만고만하면 1등과의 점수차가 압도적이어야 바꿈
# 자기 점수가 높을 수록 2등, 3등 점수와 1등 점수차도 더 벌어짐 (+상수보다는 선형?)
# 자기가 꼴찌가 아니면 전략 안 바꿈! (하지만 극단적으로 높은 점수는 가끔 받음)

In [None]:
interact(inspection, buf_list=fixed(buf_list), )

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
color_list = ['green', 'blue','orangered', 'gold', 'purple', 'cyan', 'black']
counter=0
if baselines:
    baseline_name = 'FollowMajor_indv'
    x = baseline_data[baseline_name]['scr_buf'][:20, :20]
    avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
    std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
    ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label=baseline_name)
    #ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
    counter+=1


#x = buf_scr_list
#avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
#std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
#ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label='RL_TT')
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
ax.set_xlabel('Time')
ax.set_ylabel('Average Performance')
ax.legend()

In [None]:
# action
baselines = ['FollowBest', 'FollowBest_indv', 'FollowMajor', 'FollowMajor_indv', 'IndvLearning', 'RandomCopy']
insp_dict = {}
insp_dict['RL'] = [[] for i in range(env_num)]
for baseline_name in baselines:
    insp_dict[baseline_name] = [[] for i in range(env_num)]

for i in range(env_num):
    print(i)
    env_base = env_list[i]
    for j in range(trj_len):
        o = buf_list[i].obs_buf[:, :, j]
        insp_dict['RL'][i].append((ac.pi.module._distribution(torch.as_tensor(o, dtype=torch.float32, device='cuda')).probs[..., 1]).detach().cpu().numpy())
        for baseline_name in baselines:
            insp_dict[baseline_name][i].append(core.__dict__[baseline_name](env_base, action_type, extra_type).step(o))

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
ax.imshow(buf_list[0].obs_buf[0][:, 50, 0, :-2])

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
plt.imshow(buf_list[0].act_buf[0][70, :, :-2])

In [None]:
insp_dict['RL'] = np.array(insp_dict['RL'])
insp_dict['RL_flatten'] = (insp_dict['RL']>=0.5).astype(np.long)
for baseline_name in baselines:
    insp_dict[baseline_name] = np.array(insp_dict[baseline_name])

In [None]:
# dist

rl2b_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['FollowBest_indv'])**2, axis=-1))
rl2m_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['FollowMajor_indv'])**2, axis=-1))
rl2i_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['IndvLearning'])**2, axis=-1))
rl2r_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['RandomCopy'])**2, axis=-1))
m2b_dist = np.sqrt(np.sum((insp_dict['FollowBest_indv'] - insp_dict['FollowMajor_indv'])**2, axis=-1))

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
i=4
ax.plot(np.mean(rl2b_dist[i], axis=(1, 2)), label = 'rl2b')
ax.plot(np.mean(rl2m_dist[i], axis=(1, 2)), label = 'rl2m')
ax.plot(np.mean(rl2i_dist[i], axis=(1, 2)), label = 'rl2i')
ax.plot(np.mean(rl2r_dist[i], axis=(1, 2)), label = 'rl2r')
ax.plot(np.mean(m2b_dist[i], axis=(1, 2)), label = 'm2b')
ax.legend()

In [None]:
def matrix_cosine(x, y):
    return np.einsum('ij,ij->i', x, y) / (
              np.linalg.norm(x, axis=1) * np.linalg.norm(y, axis=1)
    )

In [None]:
rl2b_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['FollowBest_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2m_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['FollowMajor_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2i_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['IndvLearning'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2r_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['RandomCopy'].reshape(-1, 15)).reshape(5, 100, 5, 100)
m2b_cos = matrix_cosine(insp_dict['FollowMajor_indv'].reshape(-1, 15), insp_dict['FollowBest_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
i=4
ax.plot(1 - np.mean(rl2b_cos[i], axis=(1, 2)), label = 'rl2b')
ax.plot(1 - np.mean(rl2m_cos[i], axis=(1, 2)), label = 'rl2m')
ax.plot(1 - np.mean(rl2i_cos[i], axis=(1, 2)), label = 'rl2i')
ax.plot(1 - np.mean(rl2r_cos[i], axis=(1, 2)), label = 'rl2r')
ax.plot(1 - np.mean(m2b_cos[i], axis=(1, 2)), label = 'm2b')
ax.legend()

In [None]:
# result
baselines = ['FollowBest', 'FollowBest_indv', 'FollowMajor', 'FollowMajor_indv', 'IndvLearning', 'RandomCopy']
insp_dict = {}
insp_dict['RL'] = [[] for i in range(env_num)]
for baseline_name in baselines:
    insp_dict[baseline_name] = [[] for i in range(env_num)]

for i in range(env_num):
    print(i)
    env_base = env_list[i]
    for j in range(trj_len):
        o = buf_list[i].obs_buf[:, :, j]
        a, v, logp = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'))
        next_o, r, s = env_base.step(a)
        insp_dict['RL'][i].append(next_o[:, :, 0, :-2])
        for baseline_name in baselines:
            insp_dict[baseline_name][i].append(core.__dict__[baseline_name](env_base, action_type, extra_type).step(o))

In [None]:
insp_dict['RL'] = np.array(insp_dict['RL'])
insp_dict['RL_flatten'] = (insp_dict['RL']>=0.5).astype(np.long)
for baseline_name in baselines:
    insp_dict[baseline_name] = np.array(insp_dict[baseline_name])
# dist

rl2b_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['FollowBest_indv'])**2, axis=-1))
rl2m_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['FollowMajor_indv'])**2, axis=-1))
rl2i_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['IndvLearning'])**2, axis=-1))
rl2r_dist = np.sqrt(np.sum((insp_dict['RL_flatten'] - insp_dict['RandomCopy'])**2, axis=-1))
m2b_dist = np.sqrt(np.sum((insp_dict['FollowBest_indv'] - insp_dict['FollowMajor_indv'])**2, axis=-1))

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
i=4
ax.plot(np.mean(rl2b_dist[i], axis=(1, 2)), label = 'rl2b')
ax.plot(np.mean(rl2m_dist[i], axis=(1, 2)), label = 'rl2m')
ax.plot(np.mean(rl2i_dist[i], axis=(1, 2)), label = 'rl2i')
ax.plot(np.mean(rl2r_dist[i], axis=(1, 2)), label = 'rl2r')
ax.plot(np.mean(m2b_dist[i], axis=(1, 2)), label = 'm2b')
ax.legend()

In [None]:
rl2b_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['FollowBest_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2m_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['FollowMajor_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2i_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['IndvLearning'].reshape(-1, 15)).reshape(5, 100, 5, 100)
rl2r_cos = matrix_cosine(insp_dict['RL'].reshape(-1, 15), insp_dict['RandomCopy'].reshape(-1, 15)).reshape(5, 100, 5, 100)
m2b_cos = matrix_cosine(insp_dict['FollowMajor_indv'].reshape(-1, 15), insp_dict['FollowBest_indv'].reshape(-1, 15)).reshape(5, 100, 5, 100)

In [None]:
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
i=4
ax.plot(1 - np.mean(rl2b_cos[i], axis=(1, 2)), label = 'rl2b')
ax.plot(1 - np.mean(rl2m_cos[i], axis=(1, 2)), label = 'rl2m')
ax.plot(1 - np.mean(rl2i_cos[i], axis=(1, 2)), label = 'rl2i')
ax.plot(1 - np.mean(rl2r_cos[i], axis=(1, 2)), label = 'rl2r')
ax.plot(1 - np.mean(m2b_cos[i], axis=(1, 2)), label = 'm2b')
ax.legend()

## Strategy

In [None]:
import timeit

def coord_triplet(s):
    x = []  # np.zeros((int(s*(s+1)*(s+2)/6), 3))

    for i in range(s):
        for j in range(i, s):
            for k in range(j, s):
                x.append([i, j, k])
                
                
    return np.array(x)

def fixed_point(ac):
    data = np.array([[[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 99, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 99, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 99, 0]]])

    a, v, logp, pi = ac.step(torch.as_tensor(data, dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    return np.round(x)[0].astype(np.int)

def explode(data):
    shape_arr = np.array(data.shape)
    size = shape_arr[:3]*2 - 1
    exploded = np.zeros(np.concatenate([size, shape_arr[3:]]), dtype=data.dtype)
    exploded[::2, ::2, ::2] = data
    return exploded

def expand_coordinates(indices):
    x, y, z = indices
    x[1::2, :, :] += 1
    y[:, 1::2, :] += 1
    z[:, :, 1::2] += 1
    return x, y, z

def make_cube(s, fp_dist, sp_dist, hp_dist):
    x = np.ones((s, s, s, 3))
    c = 0
    for i in range(s):
        for j in range(i, s):  # i+1
            for k in range(j, s):  # j+1
                x[i, j, k] = [fp_dist[c], sp_dist[c], hp_dist[c]]
                c+=1
    return x

def assign_facecolors(pi_list, fp, sp, hp, fp_show, sp_show, hp_show, max_s):
    fp_dist = (np.sum((pi_list-fp)**2, axis=-1)**0.5)/np.sqrt(N)
    sp_dist = (np.sum((pi_list-sp)**2, axis=-1)**0.5)/np.sqrt(N)
    hp_dist = (np.sum((pi_list-hp)**2, axis=-1)**0.5)/np.sqrt(N)
    
    facecolors = np.zeros((max_s, max_s, max_s, 4)) # R, G, B, alpha
    mc = make_cube(max_s, fp_dist, sp_dist, hp_dist)
    if fp_show:
        facecolors[..., 0] = 1 - mc[:, :, :, 0]   # Red : fp_dist
    if sp_show:
        facecolors[..., 1] = 1 - mc[:, :, :, 1]   # Red : fp_dist
    if hp_show:
        facecolors[..., 2] = 1 - mc[:, :, :, 2]   # Blue : hp_dist
    #facecolors[..., -1] = (np.maximum((1 - np.min(mc, axis=-1)), 0.5)-0.5)*2
    facecolors[..., -1] = (1 - np.min(mc[..., [fp_show, sp_show, hp_show]], axis=-1))**2 * 0.8 # maximum opacity : 0.8 * 0.8 
    return facecolors, fp_dist, sp_dist, hp_dist

def plot_cube(facecolors, stride, angle=320, name = '', save=True):
    IMG_DIM = len(facecolors)
    facecolors = explode(facecolors)
    
    filled = facecolors[:,:,:,-1] != 0
    #print(filled.shape)
    #print(np.indices(np.array(filled.shape) + 1).shape)
    x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1))

    fig = plt.figure(figsize=(4, 4), dpi=200)
    ax = fig.add_subplot(111, projection='3d')
    ax.view_init(30, angle)
    ax.set_xlim(right=IMG_DIM*stride)
    ax.set_ylim(top=IMG_DIM*stride)
    ax.set_zlim(top=IMG_DIM*stride)
    
    ax.set_xlabel(r'$n_1$')
    ax.set_ylabel(r'$n_2$')
    ax.set_zlabel(r'$n_3 (highest)$')
    
    ax.voxels(x/2*stride, y/2*stride, z/2*stride, filled, facecolors=facecolors, shade=False)
    if save:
        plt.savefig(f'cube_{name}.png', dpi=200)

In [None]:
#%matplotlib notebook
%matplotlib inline
for s in range(5, 101, 5):  # 5, 10, ..., 95, 100
    print(s)
    max_s = 100
    '''
    data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                    [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
    '''
    template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

    tc = coord_triplet(max_s)
    data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
    tc = np.c_[np.ones(tc.shape[0])*s, tc] 
    data[:, :, -2] = tc
    pi_list = []
    for i in range((data.shape[0]//10000)+1):
        a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
        x = pi.probs[..., 1].detach().cpu().numpy()
        pi_list.append(x)
    pi_list = np.concatenate(pi_list, axis=0)
    fp = fixed_point(ac)
    sp = template[0][:-2]
    hp = template[-1][:-2]
    facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, True, False, True, max_s)
    stride = 5
    assert max_s%stride == 0
    facecolors = facecolors[::stride, ::stride, ::stride]
    plot_cube(facecolors, stride = 5, angle=-75, name = f'complete_L200_E500_S{s}')

In [None]:
%matplotlib notebook
s = 40
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)
fp = fixed_point(ac)
sp = template[0][:-2]
hp = template[-1][:-2]
facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, True, False, True, max_s)
stride = 5
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube(facecolors, stride = 5, angle=-75, name = str(s), save=True)

## Strategy

In [None]:
import timeit

def coord_triplet(s):
    x = []  # np.zeros((int(s*(s+1)*(s+2)/6), 3))

    for i in range(s):
        for j in range(i, s):
            for k in range(j, s):
                x.append([i, j, k])
                
                
    return np.array(x)

def fixed_point(ac):
    data = np.array([[[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 99, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 99, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 99, 0]]])

    a, v, logp, pi = ac.step(torch.as_tensor(data, dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    return np.round(x)[0].astype(np.int)

def explode(data):
    shape_arr = np.array(data.shape)
    size = shape_arr[:3]*2 - 1
    exploded = np.zeros(np.concatenate([size, shape_arr[3:]]), dtype=data.dtype)
    exploded[::2, ::2, ::2] = data
    return exploded

def expand_coordinates(indices):
    x, y, z = indices
    x[1::2, :, :] += 1
    y[:, 1::2, :] += 1
    z[:, :, 1::2] += 1
    return x, y, z

def make_cube(s, fp_dist, sp_dist, hp_dist):
    x = np.ones((s, s, s, 3))
    c = 0
    for i in range(s):
        for j in range(i, s):  # i+1
            for k in range(j, s):  # j+1
                x[i, j, k] = [fp_dist[c], sp_dist[c], hp_dist[c]]
                c+=1
    return x

def assign_facecolors(pi_list, fp, sp, hp, fp_show, sp_show, hp_show, max_s):
    fp_dist = (np.sum((pi_list-fp)**2, axis=-1)**0.5)/np.sqrt(N)
    sp_dist = (np.sum((pi_list-sp)**2, axis=-1)**0.5)/np.sqrt(N)
    hp_dist = (np.sum((pi_list-hp)**2, axis=-1)**0.5)/np.sqrt(N)
    
    facecolors = np.zeros((max_s, max_s, max_s, 4)) # R, G, B, alpha
    mc = make_cube(max_s, fp_dist, sp_dist, hp_dist)
    if fp_show:
        facecolors[..., 0] = 1 - mc[:, :, :, 0]   # Red : fp_dist
    if sp_show:
        facecolors[..., 1] = 1 - mc[:, :, :, 1]   # Red : fp_dist
    if hp_show:
        facecolors[..., 2] = 1 - mc[:, :, :, 2]   # Blue : hp_dist
    #facecolors[..., -1] = (np.maximum((1 - np.min(mc, axis=-1)), 0.5)-0.5)*2
    facecolors[..., -1] = (1 - np.min(mc[..., [fp_show, sp_show, hp_show]], axis=-1))**2 * 0.8 # maximum opacity : 0.8 * 0.8 
    return facecolors, fp_dist, sp_dist, hp_dist

def plot_cube(facecolors, stride, angle=320, name = '', save=True):
    IMG_DIM = len(facecolors)
    facecolors = explode(facecolors)
    
    filled = facecolors[:,:,:,-1] != 0
    #print(filled.shape)
    #print(np.indices(np.array(filled.shape) + 1).shape)
    x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1))

    fig = plt.figure(figsize=(4, 4), dpi=200)
    ax = fig.add_subplot(111, projection='3d')
    ax.view_init(30, angle)
    ax.set_xlim(right=IMG_DIM*stride)
    ax.set_ylim(top=IMG_DIM*stride)
    ax.set_zlim(top=IMG_DIM*stride)
    
    ax.set_xlabel(r'$n_1$')
    ax.set_ylabel(r'$n_2$')
    ax.set_zlabel(r'$n_3 (highest)$')
    
    ax.voxels(x/2*stride, y/2*stride, z/2*stride, filled, facecolors=facecolors, shade=False)
    if save:
        plt.savefig(f'./result/cube_figure/cube_{name}.png', dpi=200)

In [None]:
#%matplotlib notebook
%matplotlib inline
for s in range(5, 101, 5):  # 5, 10, ..., 95, 100
    print(s)
    max_s = 100
    '''
    data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                    [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
    '''
    template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

    tc = coord_triplet(max_s)
    data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
    tc = np.c_[np.ones(tc.shape[0])*s, tc] 
    data[:, :, -2] = tc
    pi_list = []
    for i in range((data.shape[0]//10000)+1):
        a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
        x = pi.probs[..., 1].detach().cpu().numpy()
        pi_list.append(x)
    pi_list = np.concatenate(pi_list, axis=0)
    fp = fixed_point(ac)
    hp = template[-1][:-2]
    facecolors, fp_dist, hp_dist = assign_facecolors(pi_list, fp, hp, max_s)
    stride = 5
    assert max_s%stride == 0
    facecolors = facecolors[::stride, ::stride, ::stride]
    plot_cube(facecolors, stride = 5, angle=-75, name = str(s))

In [None]:
# complete_L200 2820
# complete_L300 2593
# maxmc_L100 4483
# maxmc_L200 3761
exp_name = 'st_complete_indv_raw_full_total_random_SI_TT_N15K7NN3_pretrainFB'
epoch = 0
ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs = load_model(exp_name, epoch)

In [None]:
exp_name = 'st_complete_total_FollowBest_SI_N15K7NN3_CE_sptest'
checkpoint = torch.load(f'./data/runs/{exp_name}/{exp_name}_s42/pyt_save/model.pth')
checkpoint = OrderedDict((f'logits_net.{key}', value) for (key, value) in checkpoint.items())
ac.pi.module.load_state_dict(checkpoint)

In [None]:
%matplotlib notebook
s = 40
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)
fp = fixed_point(ac)
sp = template[0][:-2]
hp = template[-1][:-2]
facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, True, True, True, max_s)
stride = 5
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube(facecolors, stride = 5, angle=-75, name = str(s), save=False)

In [None]:
s = 70
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)

In [None]:
# baseline
mod_name = '70SF'
baseline_name = 'RL_Inspired_SLSs'
ac_base = core.__dict__[baseline_name](env_base, action_type, extra_type, corr_type='TT', mod_type=mod_name)

%matplotlib notebook
s = 60
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a = ac_base.step(data[i*10000:(i+1)*10000].reshape(2, -1, 4, 17))
    x = a.reshape(-1, 15)
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)
fp = np.zeros(15)
sp = template[0][:-2]
hp = template[-1][:-2]
facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, False, True, True, max_s)
stride = 5
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube(facecolors, stride = 5, angle=-75, name = str(s), save=False)

In [None]:
# baseline
baseline_name = 'FollowBest'
env_base = envs.__dict__[env_name](**env_kwargs)
ac_base = core.__dict__[baseline_name](env_base, action_type, extra_type, corr_type='TT')

%matplotlib notebook
s = 60
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a = ac_base.step(data[i*10000:(i+1)*10000].reshape(2, -1, 4, 17))
    x = a.reshape(-1, 15)
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)
fp = np.zeros(15)
sp = template[0][:-2]
hp = template[-1][:-2]
facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, False, True, True, max_s)
stride = 5
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube(facecolors, stride = 5, angle=-75, name = str(s), save=False)

In [None]:
a.shape

In [None]:
pi_list[1].shape

In [None]:
plt.imshow(pi_list, aspect='auto')
plt.colorbar()

In [None]:
plt.plot(sp_dist)

In [None]:
x = np.random.randn(10, 3)

In [None]:
x

In [None]:
m.argsort()[:, -2]

## Strategy

In [None]:
import timeit

def coord_triplet(s):
    x = []  # np.zeros((int(s*(s+1)*(s+2)/6), 3))

    for i in range(s):
        for j in range(i, s):
            for k in range(j, s):
                x.append([i, j, k])
                
                
    return np.array(x)

def fixed_point(ac):
    data = np.array([[[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 99, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 99, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 99, 0]]])

    a, v, logp, pi = ac.step(torch.as_tensor(data, dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    return np.round(x)[0].astype(np.int)

def explode(data):
    shape_arr = np.array(data.shape)
    size = shape_arr[:3]*2 - 1
    exploded = np.zeros(np.concatenate([size, shape_arr[3:]]), dtype=data.dtype)
    exploded[::2, ::2, ::2] = data
    return exploded

def expand_coordinates(indices):
    x, y, z = indices
    x[1::2, :, :] += 1
    y[:, 1::2, :] += 1
    z[:, :, 1::2] += 1
    return x, y, z

def make_cube(s, fp_dist, sp_dist, hp_dist):
    x = np.ones((s, s, s, 3))
    c = 0
    for i in range(s):
        for j in range(i, s):  # i+1
            for k in range(j, s):  # j+1
                x[i, j, k] = [fp_dist[c], sp_dist[c], hp_dist[c]]
                c+=1
    return x

def assign_facecolors(pi_list, fp, sp, hp, fp_show, sp_show, hp_show, max_s):
    fp_dist = (np.sum((pi_list-fp)**2, axis=-1)**0.5)/np.sqrt(N)
    sp_dist = (np.sum((pi_list-sp)**2, axis=-1)**0.5)/np.sqrt(N)
    hp_dist = (np.sum((pi_list-hp)**2, axis=-1)**0.5)/np.sqrt(N)
    
    facecolors = np.zeros((max_s, max_s, max_s, 4)) # R, G, B, alpha
    mc = make_cube(max_s, fp_dist, sp_dist, hp_dist)
    if fp_show:
        facecolors[..., 0] = 1 - mc[:, :, :, 0]   # Red : fp_dist
    if sp_show:
        facecolors[..., 1] = 1 - mc[:, :, :, 1]   # Red : fp_dist
    if hp_show:
        facecolors[..., 2] = 1 - mc[:, :, :, 2]   # Blue : hp_dist
    #facecolors[..., -1] = (np.maximum((1 - np.min(mc, axis=-1)), 0.5)-0.5)*2
    facecolors[..., -1] = (1 - np.min(mc[..., [fp_show, sp_show, hp_show]], axis=-1))**2 * 0.8 # maximum opacity : 0.8 * 0.8 
    return facecolors, fp_dist, sp_dist, hp_dist

def plot_cube(facecolors, stride, angle=320, name = '', save=True):
    IMG_DIM = len(facecolors)
    facecolors = explode(facecolors)
    
    filled = facecolors[:,:,:,-1] != 0
    #print(filled.shape)
    #print(np.indices(np.array(filled.shape) + 1).shape)
    x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1))

    fig = plt.figure(figsize=(4, 4), dpi=200)
    ax = fig.add_subplot(111, projection='3d')
    ax.view_init(30, angle)
    ax.set_xlim(right=IMG_DIM*stride)
    ax.set_ylim(top=IMG_DIM*stride)
    ax.set_zlim(top=IMG_DIM*stride)
    
    ax.set_xlabel(r'$n_1$')
    ax.set_ylabel(r'$n_2$')
    ax.set_zlabel(r'$n_3 (highest)$')
    
    ax.voxels(x/2*stride, y/2*stride, z/2*stride, filled, facecolors=facecolors, shade=False)
    if save:
        plt.savefig(f'./result/cube_figure/cube_{name}.png', dpi=200)

In [None]:
#%matplotlib notebook
%matplotlib inline
for s in range(5, 101, 5):  # 5, 10, ..., 95, 100
    print(s)
    max_s = 100
    '''
    data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                    [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
                  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
    '''
    template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
                  [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

    tc = coord_triplet(max_s)
    data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
    tc = np.c_[np.ones(tc.shape[0])*s, tc] 
    data[:, :, -2] = tc
    pi_list = []
    for i in range((data.shape[0]//10000)+1):
        a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
        x = pi.probs[..., 1].detach().cpu().numpy()
        pi_list.append(x)
    pi_list = np.concatenate(pi_list, axis=0)
    fp = fixed_point(ac)
    hp = template[-1][:-2]
    facecolors, fp_dist, hp_dist = assign_facecolors(pi_list, fp, hp, max_s)
    stride = 5
    assert max_s%stride == 0
    facecolors = facecolors[::stride, ::stride, ::stride]
    plot_cube(facecolors, stride = 5, angle=-75, name = str(s))

In [None]:
# complete_L200 2820
# complete_L300 2593
# maxmc_L100 4483
# maxmc_L200 3761
exp_name = 'st_complete_indv_raw_full_total_random_SI_TT_N15K7NN3_new_rand200'
epoch = 2820
ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs = load_model(exp_name, epoch)

In [None]:
%matplotlib notebook
s = 10
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)
fp = fixed_point(ac)
sp = template[0][:-2]
hp = template[-1][:-2]
facecolors, fp_dist, sp_dist, hp_dist = assign_facecolors(pi_list, fp, sp, hp, True, False, True, max_s)
stride = 5
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube(facecolors, stride = 5, angle=-75, name = str(s), save=False)

In [None]:
s = 40
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)

In [None]:
plt.imshow(pi_list, aspect='auto')
plt.colorbar()

In [None]:
s = 70
max_s = 100
'''
data = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, -1, 0],
                [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, -1, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0]])
'''
template = np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], 
              [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, -1, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, -1, 0]])

tc = coord_triplet(max_s)
data = np.repeat(template.reshape(1, 4, 17), len(tc), axis=0)
tc = np.c_[np.ones(tc.shape[0])*s, tc] 
data[:, :, -2] = tc
pi_list = []
for i in range((data.shape[0]//10000)+1):
    a, v, logp, pi = ac.step(torch.as_tensor(data[i*10000:(i+1)*10000], dtype=torch.float32, device='cuda'), return_pi=True)
    x = pi.probs[..., 1].detach().cpu().numpy()
    pi_list.append(x)
pi_list = np.concatenate(pi_list, axis=0)

In [None]:
plt.imshow(pi_list, aspect='auto')
plt.colorbar()

## Score distribution

In [None]:
env_num = 1
test_ensemble_num = 10
env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]
state_list = []
for i in range(env_num):
    _, fixed_state = env_list[i].reset(E=test_ensemble_num, base=True)
    state_list.append(deepcopy(fixed_state))
print("Baseline construction initiated")
# score histogram
env = env_list[0]
plt.hist(np.array(list(env.landscape.get_all_state_values().values()))/env.landscape.get_global_max()[1], bins=100)

## Data distribution

In [None]:
obs_buf = buf_list[0].obs_buf
score_pairs = np.round(obs_buf[:, :, :, :, -2]).reshape(-1, obs_buf.shape[-2]).astype(np.int)
self_scores = score_pairs[:, 0].reshape(-1, 1)
neighbor_scores = np.sort(score_pairs[:, 1:], axis=-1)
score_pairs = np.concatenate((self_scores, neighbor_scores), axis=-1)
score_pairs = score_pairs[np.lexsort(list(score_pairs[:, -(i+1)] for i in range(score_pairs.shape[-1])))]

In [None]:
2**15

In [None]:
cube = make_cube_dd(score_pairs, s, max_s)

In [None]:
np.max(cube)

In [None]:
h_list[81]

In [None]:
%matplotlib inline
h_list = np.zeros(101)
for i in range(100):
    h_list[i] = len(score_pairs[score_pairs[:, 0]==i])
plt.plot(h_list)

In [None]:
def make_cube_dd(score_pairs, s, max_s):
    states_unique, freq_states = np.unique(score_pairs, axis=0, return_counts=True)
    s_filter = states_unique[:, 0]==s
    states_unique = states_unique[s_filter][:, 1:]
    freq_states = freq_states[s_filter]
    
    cube = np.zeros((max_s+1, max_s+1, max_s+1)).astype(np.int)
    for a, b in zip(states_unique, freq_states):
        cube[a[0], a[1], a[2]] = b
    return cube

def assign_facecolors_dd(score_pairs, s, max_s):
    
    facecolors = np.zeros((max_s+1, max_s+1, max_s+1, 4)) # R, G, B, alpha
    mc = make_cube_dd(score_pairs, s, max_s)
    facecolors[..., -1] = mc / np.max(mc)
    print(np.max(mc))
    return facecolors, np.max(mc)

def explode(data):
    shape_arr = np.array(data.shape)
    size = shape_arr[:3]*2 - 1
    exploded = np.zeros(np.concatenate([size, shape_arr[3:]]), dtype=data.dtype)
    exploded[::2, ::2, ::2] = data
    return exploded

def expand_coordinates(indices):
    x, y, z = indices
    x[1::2, :, :] += 1
    y[:, 1::2, :] += 1
    z[:, :, 1::2] += 1
    return x, y, z


def plot_cube_dd(facecolors, max_mc, s, stride, angle=320, name = '', save=True):
    IMG_DIM = len(facecolors)
    facecolors = explode(facecolors)
    
    filled = facecolors[:,:,:,-1] != 0
    #print(filled.shape)
    #print(np.indices(np.array(filled.shape) + 1).shape)
    x, y, z = expand_coordinates(np.indices(np.array(filled.shape) + 1))

    fig = plt.figure(figsize=(4, 4), dpi=200)
    ax = fig.add_subplot(111, projection='3d')
    ax.view_init(30, angle)
    ax.set_xlim(right=IMG_DIM*stride)
    ax.set_ylim(top=IMG_DIM*stride)
    ax.set_zlim(top=IMG_DIM*stride)
    
    ax.set_xlabel(r'$n_1$')
    ax.set_ylabel(r'$n_2$')
    ax.set_zlabel(r'$n_3 (highest)$')
    
    ax.voxels(x/2*stride, y/2*stride, z/2*stride, filled, facecolors=facecolors, shade=False)
    
    xx, yy = np.meshgrid(range(IMG_DIM*stride), range(IMG_DIM*stride))
    z = xx * 0 + s
    # plot the plane
    ax.plot_surface(xx, yy, z, color = 'r', alpha=0.3)

    if save:
        plt.savefig(f'./result/cube_figure/data_{name}.png', dpi=200)

In [None]:
%matplotlib notebook
s = 80
max_s = 100

facecolors, max_mc= assign_facecolors_dd(score_pairs, s, max_s)
stride = 1
assert max_s%stride == 0
facecolors = facecolors[::stride, ::stride, ::stride]
plot_cube_dd(facecolors, max_mc, s, stride=stride, angle=-75, name = str(s), save=True)

## Supervised

In [None]:
import envs
import ppo.core as core
from utils.utils import DataGen
from torch.utils.data import DataLoader
from torch.utils.data.dataset import TensorDataset
import ppo.net as net

In [None]:
baseline_type = 'FollowBest'
exp_name = 'test'

batch_size = 16
batch_num = 2
total_size = batch_num * batch_size
train_ratio = 0.8

generator = DataGen(env, baseline_type, batch_size, batch_num)
if not os.path.isfile('./data/' + exp_name + '_train.pkl'):
    generator.run(exp_name, total_size, batch_size, train_ratio)

In [None]:
with open('./data/' + exp_name + '_train.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open('./data/' + exp_name + '_test.pkl', 'rb') as f:
    test_data = pickle.load(f)

In [None]:
exp_name = 'complete_total_FollowBest_SIR_N10K3NN3'
with open('./data/supervised/' + exp_name + '_train.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open('./data/supervised/' + exp_name + '_test.pkl', 'rb') as f:
    test_data = pickle.load(f)

In [None]:
train_data_image = np.concatenate(train_data['Image'], axis=0)
train_data_image = train_data_image.reshape(-1, *train_data_image.shape[-2:])
train_data_label = np.concatenate(train_data['Label'], axis=0)
train_data_label = train_data_label.reshape(-1, *train_data_label.shape[-1:])
test_data_image = np.concatenate(test_data['Image'], axis=0)
test_data_image = test_data_image.reshape(-1, *test_data_image.shape[-2:])
test_data_label = np.concatenate(test_data['Label'], axis=0)
test_data_label = test_data_label.reshape(-1, *test_data_label.shape[-1:])

train_data = TensorDataset(torch.FloatTensor(train_data_image), torch.FloatTensor(train_data_label))
test_data = TensorDataset(torch.FloatTensor(test_data_image), torch.FloatTensor(test_data_label))

train_loader = DataLoader(
            train_data,
            batch_size=batch_size,
            shuffle=True,
            pin_memory=True,
            num_workers=0
        )
test_loader = DataLoader(
            test_data,
            batch_size=batch_size,
            shuffle=False,
            pin_memory=True,
            num_workers=0
        )

In [None]:
model = net.__dict__['ds']((4, 13), 20).cuda()
exp_name = 'complete_total_FollowBest_SIR_N10K3NN3_CE_sptest'
checkpoint = torch.load(f'./data/runs/{exp_name}/{exp_name}_s42/pyt_save/model.pth')
model.load_state_dict(checkpoint)

# ETC

In [None]:
def sample_gumbel(shape, eps=1e-20):
    U = torch.rand(shape).cuda()
    return -Variable(torch.log(-torch.log(U + eps) + eps))

def gumbel_softmax_sample(logits, temperature):
    y = logits + sample_gumbel(logits.size())
    return F.softmax(y / temperature, dim=-1)

def gumbel_softmax(logits, temperature):
    """
    input: [*, n_class]
    return: [*, n_class] an one-hot vector
    """
    y = gumbel_softmax_sample(logits, temperature)
    shape = y.size()
    _, ind = y.max(dim=-1)
    y_hard = torch.zeros_like(y).view(-1, shape[-1])
    y_hard.scatter_(1, ind.view(-1, 1), 1)
    y_hard = y_hard.view(*shape)
    return (y_hard - y).detach() + y

import math
print(gumbel_softmax(torch.cuda.FloatTensor([[math.log(0.1), math.log(0.4), math.log(0.3), math.log(0.2)]] * 20000), 1).sum(dim=0))

In [None]:
def get_degree_preserving_randomization(edges):
    '''
    Randomizes a network provided by an edge list 
    producing neither self links nor duplicate links.
    The degree sequence will stay the same.
    INPUT:
    --- edges: list or set containing node pairs (tuples or lists of two nodes)
         
    OUTPUT:
    --- new_edges: new list containing new node pairs (tuples of two nodes)
    '''
    
    # make new set copy from edgelist
    edges = set( [tuple(e) for e in edges ]) 

    # get list of stubs
    stubs = [ ]
    [ stubs.extend(e) for e in edges ]

    # get a Counter object that counts the stubs for every node
    stub_counter = Counter(stubs)

    # initialize the new edge list
    new_edges = set()

    # get available nodes (nodes that have nonzero stub count)
    nodes = np.array([ stub for stub,count in stub_counter.items() if count!=0 ])

    # loop till the number of available nodes is zero
    while len(nodes)>0:

        # initialize dummy values for new edge
        first,second = -1,-1

        # choose edges that are not self-links (only possible if len(nodes)>1)
        while first == second and len(nodes)>1:
            first,second = np.random.choice(nodes,size=(2,),replace=False)

        # if the chosen (source,target) is are not the same
        # and not yet connected 
        # and there is more than one node with available stubs
        if first!=second and \
           (first,second) not in new_edges and \
           (second,first) not in new_edges and \
           len(nodes)>1:
            new_edges.add((first,second))
            stub_counter[first] -= 1
            stub_counter[second] -= 1
        else:
            # if not, pop a random edge and put its nodes 
            # back in the stub pool
            edge = random.sample(new_edges,1)[0]
            new_edges.remove(edge)
            stub_counter[edge[0]] += 1
            stub_counter[edge[1]] += 1

        # get available nodes (nodes that have nonzero stub count)
        nodes = np.array([ stub for stub,count in stub_counter.items() if count!=0 ])

        
    return list(new_edges)

In [None]:
E = 32
M = 100
N = 15
K = 7
NN = 9
exp = 8
trj_len = 100
graph_type = 'complete'
reward_type = 'indv_raw'
action_type = 'total'
extra_type = 'SI'
env_name = 'SL_NK_' + action_type

nx_dict = {'complete': nx.complete_graph, 'ba': nx.barabasi_albert_graph, 'er': nx.erdos_renyi_graph} 
nx_arg_dict = {'complete': {'n': M}, 'ba': {'n': M, 'm': 19}, 'er': {'n': M, 'p': 0.3}}

env_kwargs = {
        'E': E,
        'M': M,
        'N': N,
        'K': K,
        'neighbor_num': NN,
        'exp': exp,
        'graph': nx_dict[graph_type],
        'graph_dict': nx_arg_dict[graph_type],
        'reward_type': reward_type,
        'action_type': action_type,
        'extra_type': extra_type,
    'corr_type': 'TT'
    }

In [None]:
env_num = 1
test_ensemble_num = 1000
env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]
state_list = []
for i in range(env_num):
    _, fixed_state = env_list[i].reset(E=test_ensemble_num, base=True)
    state_list.append(deepcopy(fixed_state))
print("Baseline construction initiated")

In [None]:
from ppo.core import Baseline
class FollowMajor_indv_test(Baseline):
    def __init__(self, env, action_type, extra_type, corr_type):
        super().__init__(env, action_type, extra_type, corr_type)
        print('test')
        self.landscape = env.landscape
        self.score_max = env.score_max

    def step(self, obs):
        with torch.no_grad():
            states_input = obs
            if self.action_type == 'total':
                E = obs.shape[0]
                M = obs.shape[1]
                N = obs.shape[3] - self.extra_num
                states = states_input[:, :, :, :N]
                states_neighbor = states[:, :, 1:, :]
                states = states[:, :, 0, :]
                scores = np.expand_dims(states_input[:, :, 0, N], axis=-1)
                scores_neighbor = states_input[:, :, 1:, N]
            elif self.action_type == 'split':
                E = obs.shape[0]
                M = obs.shape[1]
                N = obs.shape[2]
                states = states_input[:, :, :, :, 0]
                states_neighbor = states[:, :, :, 1:].transpose(0, 1, 3, 2)
                states = states[:, :, :, 0]
                scores = np.expand_dims(states_input[:, :, 0, 0, 1], axis=-1)
                scores_neighbor = states_input[:, :, 0, 1:, 1]
            else:
                raise NotImplementedError

            #states_social = np.copy(states)
            states_social = np.zeros_like(states)
            for i in range(E):
                for j in range(M):
                    states_unique, freq_states = np.unique(states_neighbor[i][j], axis=0, return_counts=True)
                    if len(freq_states) < self.env.neighbor_num:  # At least one 'most requent' state
                        #print(freq_states, freq_states.max(), freq_states==freq_states.max())
                        states_most_frequent = states_unique[freq_states == freq_states.max()]
                        
                        if len(states_most_frequent) == 1 :  # Single 'most frequent' state
                            states_social[i][j] = states_most_frequent[0]
                        else:  # Multiple 'most frequent' solutions
                            states_social[i][j] = states_most_frequent[np.random.randint(len(states_most_frequent))]
                    else:  # No frequent state
                        states_social[i][j] = states[i][j]
                        #print(states[i][j])
                        #indv_index = np.random.randint(N)
                        #states_social[i][j] = states[i][j]
                        #states_social[i][j][indv_index] = (states_social[i][j][indv_index] + 1) % 2
            '''
            scores_social = self.env.get_score(states=states_social)
            better_social = (scores_social > scores).astype(np.long)
            print(states_social, scores_social, better_social)
            
            '''
            print(states_social.shape)
            for e in range(test_ensemble_num):
                print(state_social[e])
                freq = np.unique(states_social[e], axis=0)
                print(freq)

            scores_social = self.env.get_score(states=states_social)
            better_social = (scores_social > scores).astype(np.long)
            
            index_indv = np.zeros_like(states)
            np.put_along_axis(index_indv, np.random.randint(0, N, (E, M, 1)), 1, axis=-1)
            states_indv = (states + index_indv) % 2
            scores_indv = self.env.get_score(states=states_indv)
            
            better_indv = (scores_indv > scores).astype(np.long) * (1 - better_social)  # not better social but better indv
            
            stay = (1 - better_social) * (1 - better_indv)
            if self.state_correction:
                states = (better_social * states_social) + (better_indv * states_indv) + stay * states
            else:
                states = states_social

            if self.reward_correction:
                scores = (scores_social * better_social) + (scores_indv * better_indv) + stay * scores
            else:
                scores = scores_social
                
            
            #states = better_social * states_social + (1 - better_social) * states
            #scores = better_social * scores_social + (1 - better_social) * scores
            #print(states, scores)

        return states

In [None]:
baseline_data['act_buf'][0][0][:, :, 0][1]

In [None]:
baseline_data['unq_buf'][0][0]

In [None]:
a.shape

In [None]:
baseline_data = {}
baseline_data['Ret'] = []
baseline_data['FinalScore'] = []
baseline_data['scr_buf'] = []
baseline_data['unq_buf'] = []
baseline_data['act_buf'] = []

for i in range(env_num):
    print(i)
    env_base = env_list[i]
    ac_base = FollowMajor_indv_test(env_base, action_type, extra_type, corr_type='TT')
    scr_buf = np.zeros((test_ensemble_num, M, trj_len), dtype=np.float32)
    unq_buf = np.zeros((test_ensemble_num, trj_len), dtype=np.float32)
    act_buf = np.zeros((test_ensemble_num, M, N, trj_len), dtype=np.float32)

    o, _ = env_base.reset(states=state_list[i], state_only=True, base=True)
    ep_ret, ep_len = 0, 0
    for t in range(2):
        print(t)
        a = ac_base.step(o)
        next_o, r, s = env_base.step(a)
        ep_ret += r
        ep_len += 1
        scr_buf[..., t] = s
        act_buf[..., t] = a
        for e in range(test_ensemble_num):
            freq = np.unique(a[e], axis=0)
            unq_buf[e][t] = freq.shape[0]
        o = next_o

    baseline_data['Ret'].append(np.mean(ep_ret / ep_len))
    baseline_data['FinalScore'].append(np.mean(s))
    baseline_data['scr_buf'].append(scr_buf)
    baseline_data['unq_buf'].append(unq_buf)
    baseline_data['act_buf'].append(act_buf)
baseline_data['Ret'] = np.mean(baseline_data['Ret'])
baseline_data['FinalScore'] = np.mean(baseline_data['FinalScore'])
baseline_data['scr_buf'] = np.array(baseline_data['scr_buf'])
baseline_data['unq_buf'] = np.array(baseline_data['unq_buf'])
baseline_data['act_buf'] = np.array(baseline_data['act_buf'])
print("Baseline finished")

In [None]:
# Figure drawing
fig = plt.figure(figsize=(4,4), dpi=150)
ax = fig.add_subplot(111)
color_list = ['green', 'blue','orangered', 'gold', 'purple', 'cyan', 'black']
counter=0

x = baseline_data['unq_buf']
avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter])
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
counter+=1

#x = buf_scr_list
#avg_pf = np.mean(x, axis=tuple(range(0, len(x.shape) - 1)))
#std_pf = np.std(x, axis=tuple(range(0, len(x.shape) - 1)))
#ax.plot(np.arange(x.shape[-1]), avg_pf, c=color_list[counter], label='RL_TT')
#ax.fill_between(np.arange(x.shape[-1]), avg_pf-std_pf, avg_pf+std_pf, facecolor=color_list[counter], alpha=0.2)
ax.set_xlabel('Time')
ax.set_ylabel('Unique states')
ax.legend()

In [None]:
env_base = env_list[0]
ac_base = FollowMajor_indv_test(env_base, action_type, extra_type, corr_type='TT')

o = np.array([[[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3.83, 1], 
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 9.48, 0], 
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 9.48, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 9.48, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 9.48, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21, 0],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21, 0], 
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21, 0], 
              [1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 6.29, 0]]]])

'''
o = np.array([[[[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 3.83, 1], 
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5.13, 0],
                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5.13, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 10.15, 0]]]])

'''

for i in range(10):
    print(ac_base.step(o))

In [None]:
from networkx.utils import nodes_or_number

In [None]:
def maxmeanclustering(n):
    assert n%5==0
    s = int(n/5)
    A = 1 - np.eye(s)
    B = np.zeros((s, s))
    C = np.block([[A, B, B, B, B],
                 [B, A, B, B, B],
                 [B, B, A, B, B],
                 [B, B, B, A, B],
                 [B, B, B, B, A]])
    for i in range(5):
        j = s * i
        C[j][j+s-1] = 0
        C[j+s-1][j] = 0
        C[j][j-1] = 1
        C[j-1][j] = 1

    G = nx.from_numpy_matrix(C)
    return G