# The Echo Chamber Simulator - Simulation

In [1]:
import sys
sys.path.append('../src')

import os
import csv
import time
import torch
import random

import pandas as pd
import networkx as nx

from utils import SimLogger, bidict
from data import DataPipeline
from evaluate import Evaluator
from simulation import SimulationPipeline
from model import KGEModel, TrainPipeline
from main import parse_args
from graph import homophily

In [2]:
args = parse_args("")

''' Set up paths & directories '''
args.dataset = 'ukraine'
args.data_dir = os.path.join(args.data_dir, 'processed', args.dataset)
args.parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

''' Set Simulation Params '''
args.user_sample_size = None
args.user_sampling_strategy = 'all'
args.simulation_epochs = 30

''' Set User Behavior '''
args.condition = 'epistemic'
num_ideological_communities = 15
args.ideological_communities = [i for i in range(num_ideological_communities)]
args.confirmation_bias = 0.5
args.ideological_bias = 0.5
args.do_disconnect = True
args.user_disconnect_ratio = 1.0
args.disconnect_ratio = 0.3

''' Set Recommender System Behavior '''
args.recommendation_mode = 'random'
args.rank_users = False

''' Set Technical Parameters '''
args.max_steps = 10000
args.do_test = False
args.recalc_communities = True
args.free_simulation = True
args.cpu_num = 16
args.cuda = True
args.log_reference_user_graph = False
args

Namespace(adversarial_temperature=1.0, batch_size=512, candidate_mode='random', condition='epistemic', confirmation_bias=0.5, cpu_num=16, cuda=True, data_dir='dataset/processed/ukraine', dataset='ukraine', disconnect_ratio=0.3, do_disconnect=True, do_test=False, evaluate_train=False, free_simulation=True, gamma=20.0, hidden_dim=20, ideological_bias=0.5, ideological_communities=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], latitude_of_acceptance=0.5, learning_rate=0.01, log_reference_user_graph=False, log_steps=5000, max_steps=10000, model='DistMult', negative_adversarial_sampling=False, negative_sample_size=10, nentity=0, nrelation=0, num_tweet_recs=20, num_user_recs=5, parent_dir='/home/tim/git-projects/2021-Twitter-KGE', rank_items=False, rank_users=False, recalc_communities=True, recommendation_mode='random', regularization=2e-06, save_checkpoint_steps=1000, save_path=None, sharpness=5, simulation_epochs=30, test_batch_size=1, test_negative_sample_size=500, uni_weight=False, u

### Logging Setup

In [3]:
if not args.save_path:
    args.save_path = os.path.join(
        args.parent_dir, 'log', args.dataset, args.condition, args.recommendation_mode, args.model, str(time.time()))
sim_logger = SimLogger(args.save_path)
sim_logger.log_info(args)

/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1646319309.534264/simulation.log


### Log Statistics for Reference User Graph

In [4]:
reference_user_graph = nx.read_gexf(os.path.join(args.parent_dir, args.data_dir, "reduced_user_graph.gexf"), node_type=int)
reference_community_mapper = bidict(nx.get_node_attributes(reference_user_graph, 'block'))

initial_communities = sorted(list(reference_community_mapper.items()))
initial_communities = [x[1] for x in initial_communities]

if args.log_reference_user_graph:
    sim_logger.calculate_metrics(
        reference_user_graph,
        reference_community_mapper)
    sim_logger.log_simulation(epoch=-1) 
    

In [5]:
target_user_graph = nx.read_gexf(os.path.join(args.parent_dir, args.data_dir, 'target_user_graph.gexf'), node_type=int)

KeyboardInterrupt: 

### Data Pipeline Setup

In [6]:
edge_file = os.path.join(args.parent_dir, args.data_dir, 'edges.csv')
node_types_file = os.path.join(args.parent_dir, args.data_dir, 'num_entities.csv')
edge_types_file = os.path.join(args.parent_dir, args.data_dir, 'edge_types.csv')
data_pipeline = DataPipeline(edge_file, node_types_file, edge_types_file, args.simulation_epochs, 
    args.free_simulation, args.recommendation_mode, args.recalc_communities, 
    reference_user_graph, ideological_communities=args.ideological_communities)
args.nentity = data_pipeline.get_num_nodes()
args.nrelation = data_pipeline.get_num_edge_chains(by_chain_type=True)['1-chain']

### Training Pipeline Setup

In [7]:
model = KGEModel(
    model_name=args.model,
    nentity=args.nentity,
    nrelation=args.nrelation,
    hidden_dim=args.hidden_dim,
    gamma=args.gamma,
)

if args.cuda:
    model.cuda()

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), 
    lr=args.learning_rate
)

evaluator = Evaluator(name=args.dataset, eval_metric='mrr')

train_pipeline = TrainPipeline(
    model, optimizer, evaluator,
    args.warm_up_steps, args.max_steps, args.batch_size, args.test_batch_size, 
    args.negative_sample_size, args.negative_adversarial_sampling, 
    args.adversarial_temperature, args.test_negative_sample_size, 
    args.regularization, args.uni_weight, args.cpu_num, args.cuda)

### Simulation Pipeline Setup

In [8]:
simulation_pipeline = SimulationPipeline(
    args.recommendation_mode, args.user_sampling_strategy, args.user_sample_size, 
    args.rank_users, args.rank_items, args.condition, args.num_user_recs, args.num_tweet_recs,
    args.confirmation_bias, args.sharpness, args.do_disconnect, args.user_disconnect_ratio, args.disconnect_ratio, args.cuda,
    ideological_communities=args.ideological_communities, ideological_strength=args.ideological_bias)

### Run Training & Agent-based Modeling

In [9]:
for _ in range(0, args.simulation_epochs):
    ''' Enter Next Epoch '''
    data_pipeline.next_epoch()
    epoch = data_pipeline.epoch
    
    # TODO: Make nicer
    simulation_pipeline.ideological_communities = data_pipeline.ideological_communities

    ''' Log Epoch Info '''
    sim_logger.log_epoch_info(epoch, data_pipeline.epoch_edges, len(data_pipeline.epoch_nodes),
        args.nentity, args.nrelation)

    ''' Prepare Training Data '''
    data = data_pipeline.get_epoch_train_data()
    if epoch == 0:
        node_ranges = data_pipeline.get_node_ranges()
        data = train_pipeline.next_epoch(args.learning_rate, data, node_ranges)
    else:
        data = train_pipeline.next_epoch(args.learning_rate, data)

    ''' Training '''
    training_logs = []
    pbar = sim_logger.init_pbar(args.max_steps if epoch == 0 else int(args.max_steps * 0.5))
    for step in pbar:
        training_logs += train_pipeline.train(data)

        if step >= train_pipeline.warm_up_steps:
            train_pipeline.adjust_optimizer(model, step)

        if step % args.log_steps == 0:
            sim_logger.update_pbar(pbar, epoch, step, training_logs)
            sim_logger.log_training(epoch, step, training_logs)
            training_logs = []

            if args.do_test:
                test_logs = train_pipeline.test(data, data_pipeline.edge_chains)
                sim_logger.log_test(epoch, step, test_logs, 'valid')

    ''' Agent-based Modeling '''
    twitter_graph = data_pipeline.get_epoch_twitter_graph()
    user_graph = data_pipeline.get_epoch_user_graph()
    with torch.no_grad(): 
        model.eval()
        epoch_sim_edges, epoch_remove_edges, sim_logs = simulation_pipeline.simulate(epoch, model, twitter_graph,
            user_graph, data_pipeline.community_mapper,
            data_pipeline.get_edge_chains(flatten=True))
    data_pipeline.add_epoch_sim_edges(epoch_sim_edges)
    data_pipeline.remove_edges(epoch_remove_edges)

    ''' Check Community Integrity '''
    user_graph = data_pipeline.get_epoch_user_graph()
    community_mapper = data_pipeline.init_community_mapper(user_graph)
    node_ranges = data_pipeline.get_node_ranges()

    ''' Log Simulation Statistics'''
    sim_logger.calculate_metrics(user_graph, community_mapper, node_ranges)
    sim_logger.log_simulation(epoch, sim_logs) 
    nx.write_gexf(user_graph, os.path.join(args.save_path, "user_graph_{}.gexf".format(epoch)))

[epoch 0 | loss: 0.16650 | last save: 5000]: 100%|████████████████████████████████████████████████████████████████████| 10000/10000 [01:13<00:00, 136.33it/s]
[epoch 0 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22354/22354 [01:49<00:00, 204.55it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 1 | loss: 0.48230 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:37<00:00, 131.62it/s]
[epoch 1 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [01:58<00:00, 187.95it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 2 | loss: 0.55419 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:37<00:00, 132.67it/s]
[epoch 2 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [02:13<00:00, 165.85it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 3 | loss: 0.62560 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:39<00:00, 127.73it/s]
[epoch 3 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [02:33<00:00, 144.81it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 4 | loss: 0.62117 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:41<00:00, 120.77it/s]
[epoch 4 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [02:41<00:00, 137.32it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 5 | loss: 0.76434 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:40<00:00, 123.53it/s]
[epoch 5 | ab-modeling]: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [03:42<00:00, 99.76it/s]


/home/tim/git-projects/2021-Twitter-KGE/log/ukraine/epistemic/random/DistMult/1645790647.525172/simulation.csv


[epoch 6 | loss: 0.82640 | last save: 0]: 100%|█████████████████████████████████████████████████████████████████████████| 5000/5000 [00:40<00:00, 124.87it/s]
[epoch 6 | ab-modeling]: 100%|████████████████████████████████████████████████████████████████████████████████████████| 22211/22211 [02:49<00:00, 131.20it/s]


KeyboardInterrupt: 