In [None]:
### modelling virality

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import altair as alt
import numpy as np
import pandas as pd
from nltk.corpus import movie_reviews
from sklearn.feature_extraction import DictVectorizer
from functools import reduce 

import networkx as nx

In [None]:
facebook_edges = pd.read_csv("facebook-links.txt", sep='\t')
facebook_edges.columns = ["from", "to", "timestamp"]
facebook_edges.head()

In [None]:
adjacency_matrix = nx.adjacency_matrix(facebook_graph)

degrees = np.sum(adjacency_matrix, axis=1)

1. pick some virality coefficient $\xi\in (0,1)$
2. select one person (node) at random to be initially "infected"
3. each currently infected person loses interest with probability $\alpha$ and becomes _permanently_ un-infected. By default we'll use $\alpha=0.01$.
4. for each infected person, each neighbour in the graph who isn't already infected or immune becomes infected with probability $\xi$. Note: if multiple neighbours of an un-infected node are infected, repeat this step multiple times. For example, if Mercutio and Paris like _Gangnam Style_, then Escalus has two chances of being infected at the current time step. Mathematically, the probability of infection is $1-(1-\xi)^2 = 2\xi-\xi^2$, but you don't need to calculate this in your code because you can just repeatedly try to infect the person.
5. repeat steps 3-4 some number of times, by default $100$. 

model should return the proportion of people infected, and graph the change over time. 

In [None]:
def virality_sim(G, ξ, α=0.01, iters=100):
    """
    Monte Carlo simulation of virality using an infection model.
    Returns the proportion infected people. 
    
    Arguments: 
    G -- (networkx.classes.graph.Graph) the graph
    ξ -- (float) the probability of infection by a neighbour, at each iteration
    α -- (float) the probability of immunity for each node, at each iteration (default: 0.01)
    iters -- (int) the number of iterations to run the simulation (default: 1000)
    """

    infected = set()
    immune = set()
        
    infected.add(np.random.choice(G.nodes()))
    
    percent_infected = []
            
    for i in range(iters):

        # immunity
        for node in G.nodes():
            if np.random.rand() < α:
                immune.add(node)
                
        # infection
        about_to_be_infected = set()
        for infected_node in infected:            
            for neighbour in G[infected_node]:
                if neighbour not in immune and neighbour not in infected:
                    if np.random.rand() < ξ:
                        about_to_be_infected.add(neighbour)
        infected |= about_to_be_infected
        percent_infected.append(len(infected)/len(G.nodes()))   

    plt.scatter(range(len(percent_infected)), percent_infected)
    plt.xlabel("iteration")
    plt.ylabel("percent infected")
    plt.show()
    plt.clf()