In [1]:
import networkx as nx
import numpy as np
import pathlib
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn import metrics
from sklearn.model_selection import train_test_split


In [2]:
base_dir = pathlib.Path().cwd().parent
data_dir = base_dir / 'data' / 'raw'
processed_data_dir = base_dir / 'data' / 'processed'

RETWEET_GRAPH_FILENAME = 'lcc_retweet.gml'
USER_LABELS_FILENAME = 'lcc_retweet_labels.npy'


In [3]:
retweet_network_lcc = nx.read_graphml(processed_data_dir / RETWEET_GRAPH_FILENAME)
user_labels = np.load(processed_data_dir / USER_LABELS_FILENAME)


# Split dataset in training-validation-test set

In [4]:
tr_perc, val_perc, test_perc = .6, .1, .3
user_idxs, num_nodes = np.array(range(len(user_labels))), len(user_labels)
tr_node_ids, test_node_ids = train_test_split(user_idxs, train_size=0.6, test_size=0.3, stratify=user_labels)
val_node_ids = np.delete(range(user_labels.shape[0]), np.concatenate([tr_node_ids, test_node_ids]))

assert (tr_node_ids.shape[0] + val_node_ids.shape[0] + test_node_ids.shape[0]) == num_nodes
assert np.intersect1d(tr_node_ids, np.concatenate([val_node_ids, test_node_ids])).shape[0] == 0


In [5]:
# Unify training and validation for unsupervised baselines
val_for_unsupervised_node_ids = np.concatenate([tr_node_ids, val_node_ids])


# Model evaluation

## Baseline method 1: Edge filtering

In [6]:
node_ids_list = list(retweet_network_lcc.nodes())
# Relabel each node from 0 to N-1
node_remapping = {node_ids_list[i]: i for i in range(len(node_ids_list))}
retweet_network_lcc = nx.relabel_nodes(retweet_network_lcc, node_remapping)

# Make edge weights of float type
for u,v,d in retweet_network_lcc.edges(data=True):
    d['weight'] = float(d['weight'])

weights_list = [attrs["weight"] for _, _, attrs in retweet_network_lcc.edges(data=True)]
unique_weights = sorted(list(set(weights_list)))

predicted_labels_list = []
# Perform different predictions based on different thresholds
for weight_percentile in tqdm(np.arange(0, 100, 0.5)):
    weight_threshold = np.percentile(weights_list, weight_percentile)
    G = retweet_network_lcc.copy()
    predicted_labels = np.full(shape=G.number_of_nodes(), fill_value=1)
    G.remove_edges_from([(a,b) for a, b, attrs in G.edges(data=True) if float(attrs["weight"]) <= weight_threshold])
    legitimate_users = [int(i) for i in nx.isolates(G)]
    if len(legitimate_users) > 0:
        predicted_labels[legitimate_users] = 0
    predicted_labels_list.append(np.copy(predicted_labels))
    

  0%|          | 0/200 [00:00<?, ?it/s]

### Select best threshold

In [7]:
metrics_dict = {'f1_macro': [], 
                'f1_micro': [], 
                'accuracy': [],
                'precision': []}
for i in tqdm(range(len(predicted_labels_list))):
    metrics_dict['f1_macro'].append(metrics.f1_score(user_labels[val_for_unsupervised_node_ids], 
                                                     predicted_labels_list[i][val_for_unsupervised_node_ids], average='macro'))
    metrics_dict['f1_micro'].append(metrics.f1_score(user_labels[val_for_unsupervised_node_ids], 
                                                     predicted_labels_list[i][val_for_unsupervised_node_ids], average='micro'))
    metrics_dict['accuracy'].append(metrics.accuracy_score(user_labels[val_for_unsupervised_node_ids], 
                                                           predicted_labels_list[i][val_for_unsupervised_node_ids]))
    metrics_dict['precision'].append(metrics.precision_score(user_labels[val_for_unsupervised_node_ids], 
                                                             predicted_labels_list[i][val_for_unsupervised_node_ids]))

VAL_METRIC = 'f1_macro' # f1_macro f1_micro accuracy precision
best_val_threshold = np.argmax(metrics_dict[VAL_METRIC])


  0%|          | 0/200 [00:00<?, ?it/s]

### Evaluate on the test set

In [8]:
test_metrics_edge_filtering_dict = {'f1_macro': None, 
                                    'f1_micro': None, 
                                    'accuracy': None,
                                    'precision': None}

test_metrics_edge_filtering_dict['f1_macro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids], average='macro')
test_metrics_edge_filtering_dict['f1_micro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids], average='micro')
test_metrics_edge_filtering_dict['accuracy'] = metrics.accuracy_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids])
test_metrics_edge_filtering_dict['precision'] = metrics.precision_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids])

print('Edge filtering method result on test set:')
print(test_metrics_edge_filtering_dict)


Edge filtering method result on test set:
{'f1_macro': 0.7554690820229495, 'f1_micro': 0.7559009786989063, 'accuracy': 0.7559009786989062, 'precision': 0.8076923076923077}


## Baseline method 2: Node pruning

In [9]:
centrality_values = nx.eigenvector_centrality(retweet_network_lcc)
# Transform into a list
centrality_val_list = [-1]*retweet_network_lcc.number_of_nodes()
for node_id in tqdm(centrality_values):
    centrality_val_list[node_id] = centrality_values[node_id]
centrality_val_list = np.array(centrality_val_list)


  0%|          | 0/5788 [00:00<?, ?it/s]

In [10]:
predicted_labels_list = []
for percentile in tqdm(np.arange(0, 100, 0.5)):
    centrality_threshold = np.percentile(centrality_val_list, percentile)
    predicted_labels = np.full(shape=retweet_network_lcc.number_of_nodes(), fill_value=1)
    coordinated_users = np.where(centrality_val_list <= centrality_threshold)[0]
    predicted_labels[coordinated_users] = 0
    predicted_labels_list.append(np.copy(predicted_labels))
    

  0%|          | 0/200 [00:00<?, ?it/s]

### Select best threshold

In [11]:
metrics_dict = {'f1_macro': [], 
                'f1_micro': [], 
                'accuracy': [],
                'precision': []}
for i in tqdm(range(len(predicted_labels_list))):
    metrics_dict['f1_macro'].append(metrics.f1_score(user_labels[val_for_unsupervised_node_ids], 
                                                     predicted_labels_list[i][val_for_unsupervised_node_ids], average='macro'))
    metrics_dict['f1_micro'].append(metrics.f1_score(user_labels[val_for_unsupervised_node_ids], 
                                                     predicted_labels_list[i][val_for_unsupervised_node_ids], average='micro'))
    metrics_dict['accuracy'].append(metrics.accuracy_score(user_labels[val_for_unsupervised_node_ids], 
                                                           predicted_labels_list[i][val_for_unsupervised_node_ids]))
    metrics_dict['precision'].append(metrics.precision_score(user_labels[val_for_unsupervised_node_ids], 
                                                             predicted_labels_list[i][val_for_unsupervised_node_ids]))
VAL_METRIC = 'f1_macro' # f1_macro f1_micro accuracy precision
best_val_threshold = np.argmax(metrics_dict[VAL_METRIC])


  0%|          | 0/200 [00:00<?, ?it/s]

### Evaluate on the test set

In [12]:
test_metrics_node_pruning_dict = {'f1_macro': None, 
                                    'f1_micro': None, 
                                    'accuracy': None,
                                    'precision': None}

test_metrics_node_pruning_dict['f1_macro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids], average='macro')
test_metrics_node_pruning_dict['f1_micro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids], average='micro')
test_metrics_node_pruning_dict['accuracy'] = metrics.accuracy_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids])
test_metrics_node_pruning_dict['precision'] = metrics.precision_score(user_labels[test_node_ids], 
                                                     predicted_labels_list[best_val_threshold][test_node_ids])

print('Node pruning method result on test set:')
print(test_metrics_node_pruning_dict)


Node pruning method result on test set:
{'f1_macro': 0.7910356789973291, 'f1_micro': 0.7921704087507196, 'accuracy': 0.7921704087507196, 'precision': 0.823658269441402}


## Supervised method 1: Node2Vec

In [13]:
from node2vec import Node2Vec


In [14]:
hidden_dim = 32
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(retweet_network_lcc, 
                    dimensions=hidden_dim, 
                    walk_length=5, 
                    num_walks=10, 
                    workers=8) 

# Embed nodes
model = node2vec.fit(window=8, min_count=1, batch_words=4)  # Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed (from the Node2Vec constructor)
node_embeddings_node2vec = np.full(shape=(retweet_network_lcc.number_of_nodes(), hidden_dim),fill_value=None)
for node_id in retweet_network_lcc.nodes():
    node_embeddings_node2vec[int(node_id)] = model.wv[node_id]



Computing transition probabilities:   0%|          | 0/5788 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 2/2 [00:01<00:00,  1.66it/s]
Generating walks (CPU: 2): 100%|██████████| 2/2 [00:01<00:00,  1.67it/s]
Generating walks (CPU: 3): 100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
Generating walks (CPU: 4): 100%|██████████| 1/1 [00:00<00:00,  1.23it/s]
Generating walks (CPU: 5): 100%|██████████| 1/1 [00:00<00:00,  1.37it/s]
Generating walks (CPU: 6): 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]
Generating walks (CPU: 7): 100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
Generating walks (CPU: 8): 100%|██████████| 1/1 [00:00<00:00,  1.57it/s]


### Evaluate on the test set

In [16]:
from sklearn.linear_model import LogisticRegression


In [18]:
node_classifier = LogisticRegression()
node_classifier.fit(node_embeddings_node2vec[tr_node_ids], user_labels[tr_node_ids])
test_logits = node_classifier.predict_proba(node_embeddings_node2vec[test_node_ids])[:, 1]
test_pred = node_classifier.predict(node_embeddings_node2vec[test_node_ids])

test_metrics_node2vec_dict = {'f1_macro': None, 
                              'f1_micro': None, 
                              'accuracy': None,
                              'precision': None,
                              'roc_auc': None}

test_metrics_node2vec_dict['f1_macro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     test_pred, average='macro')
test_metrics_node2vec_dict['f1_micro'] = metrics.f1_score(user_labels[test_node_ids], 
                                                     test_pred, average='micro')
test_metrics_node2vec_dict['accuracy'] = metrics.accuracy_score(user_labels[test_node_ids], 
                                                     test_pred)
test_metrics_node2vec_dict['precision'] = metrics.precision_score(user_labels[test_node_ids], 
                                                     test_pred)
test_metrics_node2vec_dict['roc_auc'] = metrics.roc_auc_score(user_labels[test_node_ids], 
                                                                              test_logits)

print('Node2Vec method result on test set:')
print(test_metrics_node2vec_dict)


Node2Vec method result on test set:
{'f1_macro': 0.909159541714494, 'f1_micro': 0.9101899827288429, 'accuracy': 0.9101899827288429, 'precision': 0.9103092783505154, 'roc_auc': 0.9739134507306108}


## Supervised method 2: GCN

In [None]:
##

In [None]:
##