In [2]:
import networkx as nx

def load_facebook_graph(path="facebook"):
    G = nx.Graph()
    with open(path, "r") as f:
        # Đọc dòng đầu: có hai số (số nút, số cạnh×2)
        header = f.readline().strip().split()
        # Nếu đúng là hai số, ta bỏ qua; còn nếu không phải thì dùng nó làm cạnh
        if len(header) == 2 and header[0].isdigit() and header[1].isdigit():
            n_nodes, twice_edges = map(int, header)
        else:
            # coi là dòng cạnh đầu
            u, v = map(int, header)
            G.add_edge(u, v)

        # Đọc phần còn lại, mỗi dòng một cạnh u v
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                continue
            u, v = map(int, parts[:2])
            G.add_edge(u, v)
    return G

# Ví dụ chạy ngay
G = load_facebook_graph("facebook")
print(f"Số nút: {G.number_of_nodes():,}  |  Số cạnh: {G.number_of_edges():,}")



Số nút: 4,039  |  Số cạnh: 88,234


In [33]:
import sys
!{sys.executable} -m pip install networkx torch torchvision torchaudio


Collecting torch
  Downloading torch-2.7.0-cp312-cp312-win_amd64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvision-0.22.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading torchaudio-2.7.0-cp312-cp312-win_amd64.whl.metadata (6.7 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.7.0-cp312-cp312-win_amd64.whl (212.5 MB)
   ---------------------------------------- 0.0/212.5 MB ? eta -:--:--
   ---------------------------------------- 0.8/212.5 MB 8.3 MB/s eta 0:00:26
   ---------------------------------------- 1.8/212.5 MB 8.4 MB/s eta 0:00:26
    --------------------------------------- 3.7/212.5 MB 7.0 MB/s eta 0:00:30
   - -------------------------------------- 5.5/212.5 MB 7.8 MB/s eta 0:00:27
   - -------------------------------------- 7.3/212.5 MB 8.1 MB/s eta 0:00:26
   - -------------------------------------- 9.2/212.5 MB 8.3 MB/s eta 0:00:25
   -- -----------

In [None]:
import networkx as nx
import random
from typing import Dict, List, Set, Tuple
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# --- Multi-Topic Independent Cascade Simulation ---
class MultiTopicIC:
    def __init__(self, G: nx.DiGraph, k: int,
                 prob: Dict[int, Dict[Tuple[int, int], float]]):
        self.G = G
        self.k = k
        self.prob = prob

    def simulate(self, seeds: List[Set[int]]) -> List[Set[int]]:
        activated = [set(S) for S in seeds]
        frontier = [set(S) for S in seeds]
        while any(frontier):
            new_frontier = [set() for _ in range(self.k)]
            for i in range(self.k):
                for u in frontier[i]:
                    for v in self.G.successors(u):
                        if v not in activated[i] and random.random() <= self.prob[i].get((u, v), 0.0):
                            new_frontier[i].add(v)
                            activated[i].add(v)
            frontier = new_frontier
        return activated

    def expected_spread(self, seeds: List[Set[int]], runs: int = 100) -> List[float]:
        total = [0.0] * self.k
        for _ in range(runs):
            act = self.simulate(seeds)
            for i in range(self.k):
                total[i] += len(act[i])
        return [t / runs for t in total]

# --- Probability Generator ---
def generate_probabilities(G: nx.DiGraph, k: int, sigma: float = 0.1) -> Dict[int, Dict[Tuple[int, int], float]]:
    indeg = {v: G.in_degree(v) if G.in_degree(v) > 0 else 1 for v in G.nodes()}
    probs: Dict[int, Dict[Tuple[int, int], float]] = {i: {} for i in range(k)}
    for i in range(k):
        for u, v in G.edges():
            base_p = 1.0 / indeg[v]
            noise = random.gauss(0, sigma)
            p = min(max(base_p + noise, 0.0), 1.0)
            probs[i][(u, v)] = p
    return probs

# --- MLP Model for Influence Prediction ---
class InfluenceMLP(nn.Module):
    def __init__(self, input_dim: int, hidden_dims: List[int] = [64, 32]):
        super().__init__()
        layers = []
        prev = input_dim
        for h in hidden_dims:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            prev = h
        layers.append(nn.Linear(prev, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x).squeeze(-1)

# --- Feature Extraction ---
def extract_features(seed_set: Set[int], degree_centrality: Dict[int, float], topic: int) -> torch.Tensor:
    sum_deg = sum(degree_centrality.get(v, 0.0) for v in seed_set)
    size = len(seed_set)
    return torch.tensor([sum_deg, size, float(topic)], dtype=torch.float32)

# --- Greedy Search for Optimal Seed Sets ---
def search_optimal_seeds(model: InfluenceMLP,
                         k: int,
                         G: nx.DiGraph,
                         degree_centrality: Dict[int, float],
                         budget: List[int]) -> List[Set[int]]:
    seeds = [set() for _ in range(k)]
    nodes = set(G.nodes())
    for i in range(k):
        for _ in range(budget[i]):
            best_node, best_gain = None, -float('inf')
            current_feat = extract_features(seeds[i], degree_centrality, i)
            current_pred = model(current_feat.unsqueeze(0))[0].item()
            for v in nodes - seeds[i]:
                feat = extract_features(seeds[i] | {v}, degree_centrality, i)
                pred = model(feat.unsqueeze(0))[0].item()
                gain = pred - current_pred
                if gain > best_gain:
                    best_gain, best_node = gain, v
            if best_node is None:
                break
            seeds[i].add(best_node)
    return seeds

# --- Main Workflow ---
if __name__ == "__main__":
    # Load graph
    path = "facebook"
    G_undirected = nx.read_edgelist(path, nodetype=int)
    G = G_undirected.to_directed()

    # Generate topic-specific probabilities
    k = 2
    sigma_noise = 0.1
    prob = generate_probabilities(G, k, sigma_noise)
    ic_model = MultiTopicIC(G, k, prob)

    # Precompute degree centrality
    degree_centrality = nx.degree_centrality(G)

    # Prepare training data by sampling random seed sets
    budget = [2] * k  # number of seeds per topic
    num_samples = 500
    X_list, y_list = [], []
    for topic in range(k):
        for _ in range(num_samples):
            S = set(random.sample(list(G.nodes()), budget[topic]))
            feat = extract_features(S, degree_centrality, topic)
            spread = ic_model.expected_spread([S if t == topic else set() for t in range(k)], runs=50)[topic]
            X_list.append(feat)
            y_list.append(spread)

    # Build DataLoader
    X = torch.stack(X_list)
    y = torch.tensor(y_list, dtype=torch.float32)
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=64, shuffle=True)

    # Initialize and train MLP
    model = InfluenceMLP(input_dim=3)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    epochs = 20
    for epoch in range(epochs):
        total_loss = 0.0
        for xb, yb in loader:
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * xb.size(0)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataset):.4f}")

    # Find optimal seed sets using trained MLP
    optimal_seeds = search_optimal_seeds(model, k, G, degree_centrality, budget)
    print("Optimal seed sets:", optimal_seeds)

    # Compute predicted spread
    spreads = [model(extract_features(optimal_seeds[i], degree_centrality, i).unsqueeze(0))[0].item() for i in range(k)]
    total_spread = sum(spreads)
    print("Predicted spread per topic:", spreads)
    print("Total predicted spread:", total_spread)
