In [1]:
import numpy as np
import networkx as nx

#%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
%config InlineBackend.figure_format = 'retina'

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataset import TensorDataset
from torch.utils.data import DataLoader # (testset, batch_size=4,shuffle=False, num_workers=4)
from torch.optim.lr_scheduler import ReduceLROnPlateau as RLRP
from torch.nn.parallel import DistributedDataParallel, DataParallel
from torch.nn.init import xavier_normal
from torch.nn.parameter import Parameter
import torchvision.datasets as datasets
import torchvision.transforms as transforms

import sys
from datetime import datetime
from functools import reduce
import os
import os.path
import pandas as pd
import pickle
import importlib
import time
from collections import Counter
from copy import deepcopy
from collections import OrderedDict

import nkmodel as nk
import ppo.core as core
from ppo.ppo import PPOBuffer
from utils.utils import max_mean_clustering_network
import envs
import json
from itertools import product
from functools import reduce  

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import matplotlib.gridspec as gridspec

In [2]:
with open('./result/OLP_updated.pickle', 'rb') as f:
    real_network = pickle.load(f)

candidate = {}
max_node_threshold = 1200

for network_index in (real_network.network_index):
    graph = nx.Graph()
    graph.add_edges_from([tuple(x) for x in real_network[real_network['network_index']==network_index]['edges_id'].values[0]]) # add weights to the edges
    graph2 = nx.k_core(graph, k=3)
    if graph2.number_of_nodes() > 0:
        if nx.is_connected(graph2) and graph2.number_of_nodes()/graph.number_of_nodes() > 0.95:
            candidate[network_index] = graph2.number_of_nodes()
        
network_data = real_network[np.isin(real_network['network_index'], list(candidate.keys()))]
network_filter = np.logical_and(network_data['networkDomain'] == 'Social', network_data['number_nodes'].values < max_node_threshold )
network_data = network_data[network_filter]
network_index = network_data.network_index.values
network_nodes = [candidate[i] for i in network_index]

In [None]:
with open('OLP_selected.pickle', 'wb') as f:
    pickle.dump(real_network, f)

In [None]:
def load_model(exp_name, epoch):

    #rel_path = f'data/runs/ds_complete_indv_raw_random_SIR_N10K3NN3_new_rand/{exp_name}/{exp_name}_s42/'
    rel_path = f'data/runs/{exp_name}/{exp_name}_s42/'

    with open(rel_path + "config.json") as json_file:
        json_data = json.load(json_file)
    env_kwargs = json_data['env_kwargs']
    env_name = json_data['env_name']
    env_kwargs['graph'] = nx.complete_graph
    ac_kwargs = json_data['ac_kwargs']
    ac_kwargs['activation'] = nn.Tanh()
    arch = json_data['arch']
    trj_len = json_data['trj_len']
    gamma = json_data['gamma']
    lam = json_data['lam']
    epochs = json_data['epochs']
    seed = json_data['seed']
    ensemble_num = env_kwargs['E']
    agent_num = env_kwargs['M']
    env_scheduler_kwargs = {
            'local_rank': 0,
            'exp_name': exp_name,
            'E': env_kwargs['E'],
            'N': env_kwargs['N'],
            'K': env_kwargs['K'],
            'exp': env_kwargs['exp'],
            'NGPU': 1, #'data_dir': 'D:\\OneDrive\\연구\\ML\\MyProject\\SocialNet\\SocialNet\\data\\runs\\ds_complete_indv_raw_random_SIR_N10K3NN3_new_rand'
        'data_dir': 'D:\\OneDrive\\연구\\ML\\MyProject\\SocialNet\\SocialNet\\data\\runs'
    }
    env_kwargs['env_scheduler'] = envs.__dict__['random_env_scheduler'](**env_scheduler_kwargs)
    json_data['corr_type'] = 'TT'
    env_kwargs['corr_type'] = 'TT'
    if len(env_kwargs['reward_type']) < 9:
        print('modify')
        env_kwargs['reward_type'] = env_kwargs['reward_type'] + '_full'
    torch.manual_seed(seed)
    np.random.seed(seed)
    env = envs.__dict__[env_name](**env_kwargs)
    action_type = env_kwargs['action_type']
    extra_type = env_kwargs['extra_type']
    extra_num = len(extra_type)
    # Instantiate environment
    if action_type == 'total':
        obs_dim = (env.neighbor_num + 1, env.N + extra_num)  # (3+1, 15+2)
        act_dim = env.action_space.n
        dim_len = env.N
    elif action_type == 'split':
        obs_dim = (env.neighbor_num + 1, 1 + extra_num)
        act_dim = (2,)
        dim_len = env.N
        
    checkpoint = torch.load(rel_path+f'pyt_save/model{epoch}.pth')
    ac = core.ActorCritic(obs_dim, act_dim, arch, **ac_kwargs)
    ac.pi.load_state_dict(checkpoint['pi'])
    ac.v.load_state_dict(checkpoint['v'])

    Parallel = DataParallel
    parallel_args = {
        'device_ids': list(range(1)),
        'output_device': 0
    } 

    ac.pi = Parallel(ac.pi, **parallel_args)
    ac.v = Parallel(ac.v, **parallel_args)
    ac.eval()
    return ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs

In [None]:
# complete_L200 2025
# complete_L200_2 2269
# complete_L300 2593
# maxmc_L100 4483
# maxmc_L200 3761
# 79, 177inspection_dict_SIRF_TT_gene_ent_EC0.003_N15K7NN3RSFTMT_Z_adam_cr-1_lr1e-5_g98_cp_E5400_E550
exp_name = 'SIRF_TT_gene_ent_EC0.003_N15K7NN3RSFTMT_Z_adam_cr-1_lr1e-5_g98_cp_E5400'
epoch = 550
ac, obs_dim, act_dim, dim_len, gamma, lam, env_kwargs = load_model(exp_name, epoch)
reward_supply_type = 'full'
env_kwargs['rescale'] = False
terminate = True

In [None]:
E = 32
M = 100
N = 15
K = 7
NN = 3
exp = 8
trj_len = 200
graph_type = 'complete'
reward_type = 'indv_raw_full'
action_type = 'total'
extra_type = 'SIRF'
env_name = 'SL_NK_' + action_type

In [None]:
copy_num = 1
index_list = [i*11 + copy_num for i in range(8)]

In [None]:
copy_num = 6
index_list += [i*11 + copy_num for i in range(8)]

In [None]:
copy_num = 7
index_list += [i*11 + copy_num for i in range(8)]

In [None]:
index_list

In [None]:
# normal test, without unique/prob

for index in index_list:

    scr_buf_list = []
    final_score_list = []
    Ret_list = []
    
    graph = nx.Graph()
    graph.add_edges_from([tuple(x) for x in real_network[real_network['network_index']==network_index[index]]['edges_id'].values[0]]) # add weights to the edges
    graph2 = nx.k_core(graph, k=3)
    graph3 = nx.convert_node_labels_to_integers(graph2)

    env_kwargs = {
            'E': E,
            'M': network_nodes[index],
            'N': N,
            'K': K,
            'neighbor_num': NN,
            'exp': exp,
            'graph': nx.from_edgelist,
            'graph_dict': {'edgelist': graph3.edges},
            'reward_type': reward_type,
            'action_type': action_type,
            'extra_type': extra_type,
        'corr_type': 'TT'
        }
    
    env_num = 5
    env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]
    start_time = time.time()
    for i in range(env_num):
        print(i)
        test_ensemble_num = 20
        buf = PPOBuffer(
            obs_dim, 
            act_dim, 
            test_ensemble_num, 
            env_kwargs['M'], 
            dim_len, 
            trj_len, 
            gamma, 
            lam, 
            split=True if env_kwargs['action_type'] == 'split' else False)


        env = env_list[i]
        o, _ = env.reset(test_ensemble_num, base=True) 
        ep_ret, ep_len = 0, 0
        best_ep_ret = -np.inf

        for t in range(trj_len):
            epoch_ended = t == trj_len - 1
            a, v, logp, pi = ac.step(torch.as_tensor(o, dtype=torch.float32, device='cuda'), return_pi=True)

            next_o, r, s = env.step(a)
            ep_ret += r
            ep_len += 1

            if reward_supply_type == 'full':
                buf.store(o, a, r, v, s, logp)
            else:
                if epoch_ended:
                    if reward_supply_type == 'final':
                        buf.store(o, a, r * trj_len, v, s, logp)
                    elif reward_supply_type == 'finalmean':
                        buf.store(o, a, ep_ret, v, s, logp)
                    else:
                        raise NotImplementedError
                else:
                    buf.store(o, a, 0, v, s, logp)

            # Update obs (critical!)
            o = next_o

            if epoch_ended:
                a, v, logp, pi = ac.step(
                    torch.as_tensor(o, dtype=torch.float32, device='cuda' ),
                    return_pi=True
                )
                _, _, s = env.step(a)
                if terminate:
                    buf.finish_path(np.zeros_like(v))
                else:
                    buf.finish_path(v)

        Ret=ep_ret / ep_len
        Ret_list.append(Ret)
        EpLen=ep_len
        FinalScore=np.mean(s)
        scr_buf_list.append(buf.scr_buf)
        final_score_list.append(FinalScore)
        ep_ret, ep_len = 0, 0

    Ret_list = np.array(Ret_list)
    final_score_list = np.array(final_score_list)
    scr_buf_list = np.array(scr_buf_list)
    elapsed_time = time.time() - start_time
    print(f'[Network {index}, ({network_index[index]}, M={network_nodes[index]}), (Time : {elapsed_time})]: {np.mean(Ret_list)}, {np.mean(final_score_list)}', )
    inspection_dict = {}
    inspection_dict['scr_buf_list'] = scr_buf_list
    
    with open(f'./result/real_network_{index}_RL.pkl', 'wb') as f:
        pickle.dump(inspection_dict, f, pickle.HIGHEST_PROTOCOL)

## Baseline

In [3]:
baseline_data_dict = {}
baseline_data_dict['keys'] = ['Ret', 'FinalScore']

In [4]:
copy_num = 1
index_list = [i*4 + copy_num for i in range(22)]

In [5]:
index_list = index_list[1:]

In [6]:
E = 20
M = 100
N = 15
K = 7
NN = 3
exp = 8
trj_len = 200
graph_type = 'complete'
reward_type = 'indv_raw_full'
action_type = 'total'
extra_type = 'SIRF'
env_name = 'SL_NK_' + action_type

In [None]:
#baselines = ['FollowBest', 'FollowBest_indv', 'FollowMajor', 'FollowMajor_indv', 'IndvLearning', 'RandomCopy']
baselines = ['FollowBest_random', 'FollowBest_prob', 'FollowMajor_random', 'FollowMajor_prob', 'IndvRandom', 'IndvProb',]

for index in index_list:

    scr_buf_list = []
    final_score_list = []
    Ret_list = []
    
    graph = nx.Graph()
    graph.add_edges_from([tuple(x) for x in real_network[real_network['network_index']==network_index[index]]['edges_id'].values[0]]) # add weights to the edges
    graph2 = nx.k_core(graph, k=3)
    graph3 = nx.convert_node_labels_to_integers(graph2)

    env_kwargs = {
            'E': E,
            'M': network_nodes[index],
            'N': N,
            'K': K,
            'neighbor_num': NN,
            'exp': exp,
            'graph': nx.from_edgelist,
            'graph_dict': {'edgelist': graph3.edges},
            'reward_type': reward_type,
            'action_type': action_type,
            'extra_type': extra_type,
        'corr_type': 'TT'
        }
    
    env_num = 5
    test_ensemble_num = 20
    env_list = [envs.__dict__[env_name](**env_kwargs) for i in range(env_num)]
    state_list = []
    for i in range(env_num):
        _, fixed_state = env_list[i].reset(E=test_ensemble_num, base=True)
        state_list.append(deepcopy(fixed_state))
    print("Baseline construction initiated")
    
    baseline_data_dict = {}
    baseline_data_dict['keys'] = ['Ret', 'FinalScore']

    for baseline_name in baselines:
        if baseline_name not in baseline_data_dict.keys():
            print(f"Baseline : {baseline_name}")
            start_time = time.time()
            baseline_data = {}
            baseline_data['Ret'] = []
            baseline_data['FinalScore'] = []
            baseline_data['scr_buf'] = []
            baseline_data['unq_buf'] = []

            for i in range(env_num):
                print(i)
                env_base = env_list[i]
                ac_base = core.__dict__[baseline_name](env_base, action_type, extra_type, corr_type='TT')
                scr_buf = np.zeros((test_ensemble_num, network_nodes[index], trj_len), dtype=np.float32)
                unq_buf = np.zeros((test_ensemble_num, trj_len), dtype=np.float32)

                o, _ = env_base.reset(states=state_list[i], state_only=True, base=True)
                ep_ret, ep_len = 0, 0
                for t in range(trj_len):
                    a = ac_base.step(o)
                    next_o, r, s = env_base.step(a)
                    ep_ret += r
                    ep_len += 1
                    scr_buf[..., t] = s
                    for e in range(test_ensemble_num):
                        freq = np.unique(a[e], axis=0)
                        unq_buf[e][t] = freq.shape[0]
                    o = next_o

                baseline_data['Ret'].append(np.mean(ep_ret / ep_len))
                baseline_data['FinalScore'].append(np.mean(s))
                baseline_data['scr_buf'].append(scr_buf)
                baseline_data['unq_buf'].append(unq_buf)
            baseline_data['Ret'] = np.mean(baseline_data['Ret'])
            baseline_data['FinalScore'] = np.mean(baseline_data['FinalScore'])
            baseline_data['scr_buf'] = np.array(baseline_data['scr_buf'])
            baseline_data['unq_buf'] = np.array(baseline_data['unq_buf'])
            baseline_data_dict[baseline_name] = baseline_data
            
            elapsed_time = time.time() - start_time
            print(f'[Network {index}, {baseline_name}, ({network_index[index]}, M={network_nodes[index]}, (Time : {elapsed_time})]: {baseline_data_dict[baseline_name]["Ret"]}, {baseline_data_dict[baseline_name]["FinalScore"]}')
    
    with open(f'real_network_{index}_base2.pkl', 'wb') as f:
        pickle.dump(baseline_data_dict, f, pickle.HIGHEST_PROTOCOL)

Baseline construction initiated
Baseline : FollowBest_random
0
1
2
3
4
[Network 5, FollowBest_random, (144.0, M=144, (Time : 1428.2655494213104)]: 0.8142353047335102, 0.9303285158700604
Baseline : FollowBest_prob
0
1
2
3
4
[Network 5, FollowBest_prob, (144.0, M=144, (Time : 1382.9322581291199)]: 0.7389032101635046, 0.7707767619957956
Baseline : FollowMajor_random
0
1
2
3
4
[Network 5, FollowMajor_random, (144.0, M=144, (Time : 1762.7056846618652)]: 0.35930918767385467, 0.5005711314424304
Baseline : FollowMajor_prob
0
1
2
3
4
[Network 5, FollowMajor_prob, (144.0, M=144, (Time : 1790.7534461021423)]: 0.46498423147288437, 0.6502179704803522
Baseline : IndvRandom
0
1
2
3
4
[Network 5, IndvRandom, (144.0, M=144, (Time : 1231.2550649642944)]: 0.3211179227970874, 0.41091935370795196
Baseline : IndvProb
0
1
2
3
4
[Network 5, IndvProb, (144.0, M=144, (Time : 1265.7010498046875)]: 0.3608788245922219, 0.45824798703416814
Baseline construction initiated
Baseline : FollowBest_random
0
1
2
3
4
[Netw

1
2
3
4
[Network 37, IndvProb, (176.0, M=440, (Time : 5684.019747018814)]: 0.3720059560008976, 0.4677467427790686
Baseline construction initiated
Baseline : FollowBest_random
0
1
2
3
4
[Network 41, FollowBest_random, (180.0, M=449, (Time : 6151.4093861579895)]: 0.8901455217049445, 0.9883649262330056
Baseline : FollowBest_prob
0
1
2
3
4
[Network 41, FollowBest_prob, (180.0, M=449, (Time : 6236.048170089722)]: 0.8384777354256909, 0.8744528327562943
Baseline : FollowMajor_random
0
1
2
3
4
[Network 41, FollowMajor_random, (180.0, M=449, (Time : 7922.125212669373)]: 0.36094868461459256, 0.4720553896522667
Baseline : FollowMajor_prob
0
1
2
3
4
[Network 41, FollowMajor_prob, (180.0, M=449, (Time : 8221.729197740555)]: 0.44024651675778703, 0.609132476601978
Baseline : IndvRandom
0
1
2
3
4
[Network 41, IndvRandom, (180.0, M=449, (Time : 5547.921191930771)]: 0.34392180924788185, 0.43453607538037875
Baseline : IndvProb
0
1
2
3
4
[Network 41, IndvProb, (180.0, M=449, (Time : 5790.473452806473)]: 0

In [None]:
baseline_data_dict

In [None]:
with open(f'./result/real_network_{index}.pkl', 'rb') as f:
    data = pickle.load(f)
for key in data.keys():
    if key != 'keys':
        print(key)
        for key2 in data['keys']:
            print(data[key][key2])