# Random graph experiments: Color and Node Fitness

## Graph and data generation module ##

In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1" 
os.environ["OPENBLAS_NUM_THREADS"] = "1"  
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"

In [2]:
import sys, time, string, random
import numpy as np
from collections import Counter
from datetime import datetime

In [3]:
class SimpleDirectedColorGraph(object):
  def __init__(self, num_nodes, p=0.5):
    self.num_nodes = num_nodes
    
    self.node_colors = np.zeros(num_nodes)
    self.node_colors[:np.ceil(p*num_nodes).astype(int)] = 1
    np.random.shuffle(self.node_colors)
    
    self.node_fitness = np.random.randn(num_nodes)
    
    self.adjacency_to = [Counter() for _ in range(num_nodes)]
    self.adjacency_from = [Counter() for _ in range(num_nodes)]
    
    self.in_degs = np.zeros(num_nodes)
    self.out_degs = np.zeros(num_nodes)
    self.history = []
    
  def add_edge(self, actor, target, warn=True):
    if self.adjacency_to[actor][target] == 0:
      self.out_degs[actor] += 1
      self.in_degs[target] += 1
      self.adjacency_to[actor][target] += 1
      self.adjacency_from[target][actor] += 1
      self.history.append((actor, target))
      return 1
    elif warn:
      print("Error: Should not add edge as it exists")
    return 0
                      
  def grow_erdos_renyi(self, num_edges, simple=True):
    num_edges_done = 0
    while num_edges_done < num_edges:
      actors = np.random.randint(0, self.num_nodes, size=num_edges - num_edges_done)
      targets = (np.random.randint(1, self.num_nodes, size=num_edges - num_edges_done) + actors) % self.num_nodes
      for actor, target in zip(actors, targets):
        num_edges_done += self.add_edge(actor, target, warn=False)
      
  def grow_with_utility(self, **kwargs):
    
    weights = kwargs['weights']
    # Randomly pick an actor
    actor = np.random.randint(0, self.num_nodes)

    
    #######################################################
    ## Create feature matrix : MODIFY FEATURE HERE
    #######################################################
    
    # Extract features

    #log_in_degrees = np.log(self.in_degs + (self.in_degs < 0.5).astype(int))
    # same_color = (self.node_colors==self.node_colors[actor]).astype(int)
    
    num_txn = np.zeros(self.num_nodes)
    # num_mutual = np.zeros(self.num_nodes)
    for n in self.adjacency_to[actor]:
      num_txn[n] += 1
    #   for nn in self.adjacency[n]:
    #     num_mutual[nn] += 1
    
    isF = (num_txn > 0).astype(int)
    # isFoF = (num_mutual > 0).astype(int)
    
    X = np.array([self.node_colors, self.node_fitness]).T
    
    #######################################################
    ## Create feature matrix : END
    #######################################################
    
    # Calculate Utility; Random Utility with gumbel 
    U = np.dot(X, weights) + np.random.gumbel(size=self.num_nodes)
    U[actor] -= 1000
    U -= 1000 * isF
    target = np.argmax(U)

    # If Random choice instead, comment the line above and use this code instead
    # target = np.random.choice(np.arange(self.num_nodes), p=softmax(U))
        
    # Add edge
    self.add_edge(actor, target)

In [6]:
def generate(kwargs):
    t0 = time.time()
    np.random.seed(kwargs['seed'])
    num_nodes = int(kwargs['num_nodes'])
    num_er_edges = int(kwargs['num_er_edges'])
    num_choice_edges = int(kwargs['num_choice_edges'])
    p = kwargs['colorratio']
    
    checkpoint_interval = kwargs['checkpoint_interval'] if 'checkpoint_interval' in kwargs else num_choice_edges
    logging_interval = kwargs['logging_interval'] if 'logging_interval' in kwargs else checkpoint_interval
    
    w0 = np.array([np.log(7), 1])

    G = SimpleDirectedColorGraph(num_nodes, p=p)
    G.grow_erdos_renyi(num_er_edges)
    log_file = open("log", "a+")
    
    t0 = time.time()
    for i in range(num_choice_edges):
      G.grow_with_utility(weights=w0)
      
      if (i+1) % logging_interval == 0 and i > 0:
        t = time.time() - t0
        t_done = datetime.fromtimestamp(t0 + t * (num_choice_edges) / (i+1)).strftime('%Y-%m-%dT%H:%M:%S') 
        tentative = "graphs-fitness-big/nodes={:.1e}&er_edges={:.1e}&choice_edges={:.1e}&color_ratio={:.1e}"\
                    .format(num_nodes, num_er_edges, num_choice_edges, p)
        msg = "Time = {}; Progress = {:.2f}; Time since beginning = {:.1f}s; Est finish = {}; Tentative Destination = {}\n"\
              .format(datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), (i+1)/num_choice_edges, t, t_done, tentative)
        with open("log", "a") as log_file:
          log_file.write(msg)
      
      if (i+1) % checkpoint_interval == 0 and i > 0:
        filename = "graphs-fitness-big/nodes={:.1e}&er_edges={:.1e}&choice_edges={:.1e}&color_ratio={:.1e}&checkpoint={:.2f}&time={}&seed={}.npz"\
                   .format(num_nodes, num_er_edges, num_choice_edges, p, (i+1)/num_choice_edges,\
                           datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), np.base_repr(kwargs['seed'], 36))
        np.savez(filename,
                 er_edges=np.array(G.history[:num_er_edges]),
                 choice_edges=np.array(G.history[num_er_edges:]),
                 node_fitness=G.node_fitness,
                 node_colors=G.node_colors)

In [None]:
from multiprocessing import Pool
from itertools import product

nums_nodes = [1e5] * 20
colorratios = [1e-4, 2e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1, 2e-1, 5e-1]

args = [{'num_nodes': num_nodes,
          'num_er_edges': num_nodes*5,
          'num_choice_edges':2e5,
          'colorratio': colorratio,
          'logging_interval':2e2,
          'seed': random.randint(0,2**32-1)} for num_nodes, colorratio in product(nums_nodes, colorratios)]

with Pool(49) as p:
  p.map(generate, args)