In [1]:
import random
import csv
import math
import numpy as np
import time

In [2]:
# e-mails can have multiple timestamps
def orderTimestamps(filename):
    
    timestampsDict = dict()

    with open(filename,'rb') as tsvin:
        tsvin = csv.reader(tsvin, delimiter='\t')
        next(tsvin)
        
        for row in tsvin:
            srcID = int(row[0])
            dstID = int(row[1])
            timestamp = int(row[3])
            edge = (srcID, dstID, timestamp)
            if edge in timestampsDict:
                timestampsDict[edge] += 1
            else:
                timestampsDict[edge] = 1
    
    sorted_timestampsDict = sorted( (key[2], key[0], key[1], value) for (key, value) in timestampsDict.items() )
    
    
    return sorted_timestampsDict

In [30]:
# creates historic record of deterministic infections, live edges
# given how many runs, p_initial, p_infect
# returns a list of lists. each sublist contains the nodes that are infected in an outbreak in chronological order
# this function is used to create live edges for CELF in maximizing outbreak detection and minimizing population affected

def createLiveEdges_combined(runs, p_initial_infect, p_infect, n=2029, seed=0, debug=False):
    random.seed(seed)
    timestampsOrdered = orderTimestamps('out.dnc-temporalGraph')
    
    
    num_nodes_infect = math.floor(n * p_initial_infect)
    lists_infected_nodes = [] # list of lists of infected nodes from every run
    lists_infected_dicts = []  # for steps
     
    
    for run in range(runs): # go thru runs
        if run % 25000 == 0:
            print "Creating Live Edge #" + str(run)
        
        cur_infected_ids = []
        cur_infected_dict = {}
        
        # Mark patient zeros
        while len(cur_infected_ids) < num_nodes_infect:
            nID = random.randint(1,n) 
            if nID not in cur_infected_ids:
                cur_infected_ids.append(nID)
                cur_infected_dict[nID] = 0
        
        step = 0 
        
        for timestamp in timestampsOrdered: # cycle thru the timestamps in the e-mails
            
            sourceNode = timestamp[1]
            destNode = timestamp[2]
            
            # this logic is used to infect neighbors
            if sourceNode in cur_infected_ids: # source node is infected, so we can try to infect
                if destNode not in cur_infected_ids: # destination node isn't infected, so we can try to infect
                    infected = False # flag to track whether infection successfully transmits
                    for email in range(timestamp[3]): # since an edge w/ the same timestamp can occur multiple times
                        if random.uniform(0, 1) < p_infect: # flip a coin to infect neighbor
                            infected = True
                            cur_infected_ids.append(destNode)
                            cur_infected_dict[destNode] = step + email # timestamp of infection
                            break # b/c we do have an infection, so no need to try to keep infecting
            step = step + timestamp[3] # increment the step
            
        lists_infected_nodes.append(cur_infected_ids) 
        lists_infected_dicts.append(cur_infected_dict)
        
    return lists_infected_nodes, lists_infected_dicts

In [31]:
live_edges_200k_point001_point3, live_edges_200k_point001_point3_steps = createLiveEdges_combined(200000, 0.001, 0.3)
live_edges_200k_point005_point3, live_edges_200k_point005_point3_steps = createLiveEdges_combined(200000, 0.005, 0.3)
live_edges_200k_point001_point5, live_edges_200k_point001_point5_steps = createLiveEdges_combined(200000, 0.001, 0.5)
live_edges_200k_point005_point5, live_edges_200k_point005_point5_steps = createLiveEdges_combined(200000, 0.005, 0.5)


Creating Live Edge #0
Creating Live Edge #25000
Creating Live Edge #50000
Creating Live Edge #75000
Creating Live Edge #100000
Creating Live Edge #125000
Creating Live Edge #150000
Creating Live Edge #175000
Creating Live Edge #0
Creating Live Edge #25000
Creating Live Edge #50000
Creating Live Edge #75000
Creating Live Edge #100000
Creating Live Edge #125000
Creating Live Edge #150000
Creating Live Edge #175000
Creating Live Edge #0
Creating Live Edge #25000
Creating Live Edge #50000
Creating Live Edge #75000
Creating Live Edge #100000
Creating Live Edge #125000
Creating Live Edge #150000
Creating Live Edge #175000
Creating Live Edge #0
Creating Live Edge #25000
Creating Live Edge #50000
Creating Live Edge #75000
Creating Live Edge #100000
Creating Live Edge #125000
Creating Live Edge #150000
Creating Live Edge #175000


In [4]:
import pickle

In [33]:
pickle.dump(live_edges_200k_point001_point3, open("live_edges_200k_point001_point3", "wb") )
pickle.dump(live_edges_200k_point001_point3_steps, open("live_edges_200k_point001_point3_steps", "wb") )
pickle.dump(live_edges_200k_point005_point3, open("live_edges_200k_point005_point3", "wb") )
pickle.dump(live_edges_200k_point005_point3_steps, open("live_edges_200k_point005_point3_steps", "wb") )
pickle.dump(live_edges_200k_point001_point5, open("live_edges_200k_point001_point5", "wb") )
pickle.dump(live_edges_200k_point001_point5_steps, open("live_edges_200k_point001_point5_steps", "wb") )
pickle.dump(live_edges_200k_point005_point5, open("live_edges_200k_point005_point5", "wb") )

In [35]:
pickle.dump(live_edges_200k_point005_point5_steps, open("live_edges_200k_point005_point5_steps", "wb") )