In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
from random import uniform, seed
import numpy as np
import pandas as pd
import time
from igraph import *
import random
from collections import Counter

In [6]:
def get_RRS(G,p):   
    """
    Inputs: G:  Ex2 dataframe of directed edges. Columns: ['source','target']
            p:  Disease propagation probability
    Return: A random reverse reachable set expressed as a list of nodes
    """
    
    # Step 1. Select random source node
    source = random.choice(np.unique(G['source']))
    
    # Step 2. Get an instance of g from G by sampling edges  
    g = G.copy().loc[np.random.uniform(0,1,G.shape[0]) < G['probability'].tolist()]

    # Step 3. Construct reverse reachable set of the random source node
    new_nodes, RRS0 = [source], [source]   
    while new_nodes:
        
        # Limit to edges that flow into the source node
        temp = g.loc[g['target'].isin(new_nodes)]

        # Extract the nodes flowing into the source node
        temp = temp['source'].tolist()

        # Add new set of in-neighbors to the RRS
        RRS = list(set(RRS0 + temp))

        # Find what new nodes were added
        new_nodes = list(set(RRS) - set(RRS0))

        # Reset loop variables
        RRS0 = RRS[:]

    return(RRS)

In [7]:
def ris(G,k,p=0.5,mc=1000):    
    """
    Inputs: G:  Ex2 dataframe of directed edges. Columns: ['source','target']
            k:  Size of seed set
            p:  Disease propagation probability
            mc: Number of RRSs to generate
    Return: A seed set of nodes as an approximate solution to the IM problem
    """
    
    # Step 1. Generate the collection of random RRSs
    start_time = time.time()
    R = [get_RRS(G,p) for _ in range(mc)]

    # Step 2. Choose nodes that appear most often (maximum coverage greedy algorithm)
    SEED, timelapse = [], []
    for _ in range(k):
        
        # Find node that occurs most often in R and add to seed set
        flat_list = [item for sublist in R for item in sublist]
        seed = Counter(flat_list).most_common()[0][0]
        SEED.append(seed)
        
        # Remove RRSs containing last chosen seed 
        R = [rrs for rrs in R if seed not in rrs]
        
        # Record Time
        timelapse.append(time.time() - start_time)
    
    return(sorted(SEED),timelapse)

In [8]:
import csv
import os
import pickle
import argparse

from utils_isf import *

parser = argparse.ArgumentParser(description="ISF algorithm")
datasets = ['Data', 'facebook-twitter']
parser.add_argument("-d", "--dataset", default="Data", type=str,
                    help="one of: {}".format(", ".join(sorted(datasets))))
num_node = [600, 5000]
parser.add_argument("-nn", "--num_node", default=600, type=int,
                    help="one of: {}".format(", ".join(str(sorted(num_node)))))
num_layer = [3, 4, 5, 6, 7, 8, 9]
parser.add_argument("-nl", "--num_layer", default=3, type=int,
                    help="one of: {}".format(", ".join(str(sorted(num_layer)))))
overlaping_user = [30, 50, 70]
parser.add_argument("-ou", "--overlaping_user", default=30, type=int,
                    help="one of: {}".format(", ".join(str(sorted(overlaping_user)))))
budgets = [10, 20, 30]
parser.add_argument("-b", "--budget", default=30, type=int,
                    help="one of: {}".format(", ".join(str(sorted(budgets)))))

parser.add_argument("-m", "--mc", default=30, type=int,
                    help="the number of Monte-Carlo simulations")
args = parser.parse_args(args=[])

# Read input graph file
file_path = '../Dataset/' + args.dataset + '/graph_' + str(args.num_node) + '_node_' + str(args.num_layer) + \
            '_layer_' + str(args.overlaping_user) + '_overlaping_user.pickle'

file_name = os.path.splitext(os.path.basename(file_path))[0]
with open(file_path, 'rb') as file:
    data = pickle.load(file)
graphs = data[0]
combined_graph = data[1]

In [30]:
def get_df(G):
    source_nodes = []
    target_nodes = []
    probs = []

    for edges in G.edges():
        p = 1 - G.get_edge_data(edges[0], edges[1])['weight']
        if combined_graph.is_directed() == False:
            source_nodes.extend([edges[0], edges[1]]) 
            target_nodes.extend([edges[1], edges[0]]) 
            probs.extend([p, p])
        else:
            source_nodes.append(edges[0])
            target_nodes.append(edges[1])
            probs.append(p)

    df = pd.DataFrame({'source': source_nodes,'target': target_nodes, 'probability': probs})
    
    return df

In [25]:
# Run algorithms
ris_output  = ris(df,5,p=0.5,mc=1000)

In [26]:
ris_output

([0, 164, 369, 371, 396],
 [1.3709588050842285,
  1.3718786239624023,
  1.3718819618225098,
  1.3718841075897217,
  1.3718857765197754])

In [27]:
S, SPREAD, timelapse, LOOKUPS, data = celf(combined_graph, k=5, mc=100)

100%|██████████| 600/600 [00:05<00:00, 116.19it/s]
100%|██████████| 4/4 [00:30<00:00,  7.51s/it]


In [28]:
S, SPREAD, timelapse,

([117, 75, 30, 530, 485],
 [17.49, 28.41, 41.05, 52.15, 57.53],
 [5.165317058563232,
  7.671156644821167,
  7.844915866851807,
  13.397634029388428,
  35.19011092185974])

In [29]:
IC(combined_graph, [0, 164, 369, 371, 396], mc=100)

(37.89,
 array([[2, 0, 0, ..., 0, 0, 0],
        [2, 0, 0, ..., 0, 0, 0],
        [2, 0, 0, ..., 0, 0, 0],
        ...,
        [2, 0, 0, ..., 0, 0, 0],
        [2, 0, 0, ..., 0, 0, 0],
        [2, 0, 0, ..., 0, 0, 0]]),
 [])

In [None]:
graphs = sorted(graphs, key=lambda graph: (graph.number_of_nodes(), graph.number_of_edges()))
start_time = time.time()

df = get_df(combined_graph)
S, timelapse  = ris(df,5,p=0.5,mc=1000)
SPREAD, _, _ = IC(combined_graph, S, mc=100)

save_time = time.time() - start_time
print("Time", save_time)


In [31]:
import numpy as np

# Mảng ban đầu
array = np.array([1, 2, 3, 4, 5])

# Chuyển đổi mảng về kiểu dữ liệu float8
array_float8 = array.astype(np.float8)

print(array_float8)

AttributeError: module 'numpy' has no attribute 'float8'

In [12]:
import networkx as nx

G = nx.Graph([(1, 1)])
A = nx.from_scipy_sparse_matrix(G)

AttributeError: 'Graph' object has no attribute 'shape'

In [7]:
A

array([[1.]])