In [91]:
import numpy as np
import math
import pprint as pp

## Generation de graphes

In [92]:
handmadeGraph = {1: {2:0.5,3:0.5},
                 2: {4:0.6},
                 3: {4:0.4,5:0.6},
                 4: {},
                 5: {},
                }
nodes = np.arange(1,6)
def randomGraph(nodes):
    graph = {}
    for n in nodes:
        graph[n] = {neigh:np.random.rand() for neigh in nodes if neigh!=n}
    return graph

## Inference 
Generation de cascades

In [93]:
def genCascade(graph,startNode,startTime=0):
    ''' Receive graph and starting infected node generate a cascade 
        Args
            graph(node to child representation)
            cascade {node : time} 
    '''
    cascade = {startNode : startTime}
    lastInfected = [startNode]
    infected_next = {}
    time = startTime+1
    while len(lastInfected)> 0:
        for infected in lastInfected:
            for (child,pct) in graph[infected].items():
                if (child not in cascade) and (child not in infected_next):
                    if (np.random.rand()<pct):
                        infected_next[child] = time
        cascade.update(infected_next)
        lastInfected = list(infected_next.keys())
        infected_next = {}
        time +=1
        
    return cascade

In [94]:
nbCascades = 200
cascades = [genCascade(handmadeGraph,1) for i in range(nbCascades)]
pp.pprint(cascades[:3])

[{1: 0}, {1: 0, 3: 1, 4: 2}, {1: 0, 2: 1}]


## Independant cascades apprentissage


### 2008 Saito
Les temps d'infections contiguë <br>
$D(i)$  all newly infected at time $i$  <br>
$D = D(0) \cup D(1) \cup ... \cup D(n)$
#### Maximimum likelyhood expectation
Etapes : 
1. On donne une première estimation des arêtes du graph : $k_{u,v}$ <br>
- On calcule $P_{w}$ en fonction de $k_{u,v}$
- On calcule par ML $k_{u,v}$ en fonction de $P_{w}$
- Retour à l'étape 2 jusqu'à convergence


In [104]:
def cascade_repr(cascade):
    ''' Transforme une cascade en list ayant pour indice 
        le temps d'infection t et pour contenu une list contenant les noeuds infectés au temps t'''
    
    maxT  = max(cascade.values())+1
    Ds = [[] for i in range(maxT)]
    for (n,t) in cascade.items():
        Ds[t].append(n)
    return Ds
D = [cascade_repr(c) for c in cascades]
print(cascades[:2],'\n',D[:2])

[{1: 0}, {1: 0, 3: 1, 4: 2}] 
 [[[1]], [[1], [3], [4]]]


In [105]:
def nodes_in_Ds(Ds):
    uniques = []
    for nodes in Ds : 
        for n in nodes : 
            uniques.append(n)
    return uniques

In [106]:
def P_sw(g,Ds,w):
    ''' Vraisemblance de l infection d un noeud sachant le graphe 
    et les noeuds précedemment acivés '''
    not_activated = 1
    t = None
    for i,nodes in enumerate(Ds):
        if (w in nodes):
            t = i
    if (t == 0): # si le noeud est le premier
        return 1
    if (t is None): # si le noeud n'est pas dans l'episode de diffusion
        raise Exception(f"node {w} is not in diff episode : {Ds} ")
    else :  # si le noeud est dans l'épisode de diffusion
        return 1 - np.prod ([1-g[parent][w] for parent in Ds[t-1]])

In [107]:
def Expectation(g,D):
    ''' Calcule l'ensemble des P_ws 
        P_ws[idD][node]->proba'''
    p_sw = [{n:P_sw(g,Ds,n) for n in nodes_in_Ds(Ds)} for Ds in D]
    return p_sw

In [108]:
def D_plus_uv_id(D,u,v):
    ''' pour chaque couples u,v renvoit l'ensemble des 
    Ds (episode infections) ou u precede v'''
    D_plus = []
    for i,Ds in enumerate(D) : 
        for t in range(1,len(Ds)):
            if (u in Ds[t-1] and v in Ds[t]):
                D_plus.append(i)
                break
    return D_plus

def D_minus_uv(D,u,v):
    '''Pour chaque couple u,v renvoit l'ensemble des 
    Ds(episode infection) ou u est present mais v ne le suit pas'''
    D_minus = set()
    for i,Ds in enumerate(D) : 
        for t in range(1,len(Ds)):
            if (u in Ds[t-1] and v not in Ds[t]):
                D_minus.add(i)
                break
        if (u in Ds[-1]):
            D_minus.add(i)
    return D_minus

In [109]:
def Maximisation_uv(g,D_plus_id,Dminus_len,p_sw,u,v):
    '''Calcule les nouveaux paramètre pour l'arete u,v '''
    if ((len(D_plus_id[u][v])+Dminus_len[u][v]) == 0):
        print(f"{u}-{v}")
    return (1/(len(D_plus_id[u][v])+Dminus_len[u][v])) *sum([g[u][v]/p_sw[i][v] for i in D_plus_id[u][v]])

def Maximisation(g,D_plus_id,Dminus_len,p_sw):
    ''' Calcule les nouveaux paramètres pour le graphe'''
    gprime = {k:dict() for k in g}
    for u in g.keys():
        for v in g.keys():
            if u != v:
                gprime[u][v] = Maximisation_uv(g,D_plus_id,Dminus_len,p_sw,u,v)
    return gprime

In [110]:
def EM_IC(D,nodes):
        # initalisation
    g = randomGraph(nodes)    
    p_sw = None
    D_plus_id =   {v:{u:D_plus_uv_id(D,v,u) for u in nodes} for v in nodes}
    D_minus_len = {v:{u:len(D_minus_uv(D,v,u))for u in nodes} for v in nodes}
    for i in range(100):
        p_sw = Expectation(g,D)
        g = Maximisation(g,D_plus_id,D_minus_len,p_sw)
    return g

D = [cascade_repr(c) for c in cascades]
finalGraph = EM_IC(D,nodes)
finalGraph

{1: {2: 0.395, 3: 0.51, 4: 0.0, 5: 0.0},
 2: {1: 0.0, 3: 0.0, 4: 0.6265853370101946, 5: 1.5606190365682204e-43},
 3: {1: 0.0, 2: 0.0, 4: 0.4451595604818274, 5: 0.5392156862745098},
 4: {1: 0.0, 2: 0.0, 3: 0.0, 5: 0.0},
 5: {1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}}

### Methode 2
Les temps d'infections non contiguë


## Evaluation