# Networks and Time: Latent Space Model Implementation

## Preliminaries

In [53]:
!pip install node2vec

Collecting node2vec
  Downloading node2vec-0.4.6-py3-none-any.whl.metadata (743 bytes)
Collecting networkx<3.0,>=2.5 (from node2vec)
  Downloading networkx-2.8.8-py3-none-any.whl.metadata (5.1 kB)
Collecting FuzzyTM>=0.4.0 (from gensim<5.0.0,>=4.1.2->node2vec)
  Downloading FuzzyTM-2.0.6-py3-none-any.whl.metadata (7.9 kB)
Collecting pyfume (from FuzzyTM>=0.4.0->gensim<5.0.0,>=4.1.2->node2vec)
  Downloading pyFUME-0.3.1-py3-none-any.whl.metadata (9.7 kB)
Collecting simpful (from pyfume->FuzzyTM>=0.4.0->gensim<5.0.0,>=4.1.2->node2vec)
  Downloading simpful-2.12.0-py3-none-any.whl.metadata (4.8 kB)
Collecting fst-pso (from pyfume->FuzzyTM>=0.4.0->gensim<5.0.0,>=4.1.2->node2vec)
  Downloading fst-pso-1.8.1.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting miniful (from fst-pso->pyfume->FuzzyTM>=0.4.0->gensim<5.0.0,>=4.1.2->node2vec)
  Downloading miniful-0.0.6.tar.gz (2.8 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hDownloading node2vec-0.4.6-py3-none-any

In [54]:
## libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import zipfile
import json
import pickle
import pycountry
import seaborn as sns
import math

from scipy.sparse.csgraph import laplacian
from scipy.linalg import eigh
from scipy.integrate import quad
from statsmodels.tsa.ar_model import AutoReg
from scipy.stats import ttest_ind
from sklearn.preprocessing import MinMaxScaler

### Trade 

In [None]:
## loading trade data in
zf = zipfile.ZipFile('/Users/teddyyankov/Library/CloudStorage/OneDrive-Nexus365/Data-Driven Network Science/conference/trade_network_data.csv.zip')
trade_df = pd.read_csv (zf.open('trade_network_data.csv'), index_col=0)
trade_df.drop(trade_df.columns[trade_df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

In [None]:
# Function to convert country name to ISO3 code
def get_iso3_code(country_name):
    try:
        country = pycountry.countries.get(name=country_name)
        return country.alpha_3
    except:
        return None

# Apply the function to convert location_name_1 to ISO3 code
trade_df['location_name_1'] = trade_df['location_name_1'].apply(get_iso3_code)

# Apply the function to convert location_name_2 to ISO3 code
trade_df['location_name_2'] = trade_df['location_name_2'].apply(get_iso3_code)

In [None]:
trade_df = trade_df.dropna(subset=['location_name_1', 'location_name_2'])

In [None]:
# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Apply min-max normalization to the "edge_value" variable
trade_df['edge_value'] = scaler.fit_transform(trade_df[['edge_value']])

In [None]:
## creating graphs
trade_graphs = []

for i in sorted(trade_df['year'].unique()):

    edges = pd.DataFrame(
        {
            "source": list(trade_df['location_name_1']),
            "target": list(trade_df['location_name_2']),
            "weight": list(trade_df['edge_value'])
        }
    )
    
    G = nx.from_pandas_edgelist (edges, edge_attr = True)
    G.graph['year'] = int(i)

    # target_node = 'PAK'

    # # Get the one-hop neighbors of the target node
    # one_hop_neighbors = list(G.neighbors(target_node))
    # one_hop_neighbors.append(target_node)  # Adding the target node itself

    # # Create a subgraph with only the specified node and its one-hop neighbors
    # subgraph = G.subgraph(one_hop_neighbors)

    trade_graphs.append(G)

In [None]:
## drawing an example network
G = trade_graphs[2]
pos = nx.spring_layout(G, seed=42)
nx.draw_networkx (G, pos, with_labels=True, node_size=5, node_color='lightblue', font_size=7, font_color='black', edge_color='gray', width=0.5)

In [None]:
# filtered_trade_graphs = []

# for graph in trade_graphs:
    
#     star_graph = nx.Graph()
#     star_graph.add_nodes_from (graph.nodes())
    
#     for neighbor in graph.neighbors("PAK"):
#         star_graph.add_edge("PAK", neighbor)
    
#     filtered_trade_graphs.append(star_graph)

In [None]:
# ## drawing an example network
# G = filtered_trade_graphs[2]
# pos = nx.spring_layout(G, seed=42)
# nx.draw_networkx (G, pos, with_labels=True, node_size=5, node_color='lightblue', font_size=7, font_color='black', edge_color='gray', width=0.5)

In [None]:
# print (filtered_trade_graphs[2].nodes() == trade_graphs[2].nodes())

### Terrorism

In [None]:
# Load the ISO_pairs_terrorism2.csv file into a DataFrame
iso_pairs_df = pd.read_csv("/Users/teddyyankov/Library/CloudStorage/OneDrive-Nexus365/Data-Driven Network Science/conference/ISO_pairs_terrorism3.csv")

# Filter the years in ISO_pairs_terrorism2 to contain the same years as trade_df
filtered_iso_pairs_df = iso_pairs_df[iso_pairs_df['iyear'].isin(trade_df['year'])]
filtered_iso_pairs_df = filtered_iso_pairs_df [(filtered_iso_pairs_df['source'] != "IRN") | (filtered_iso_pairs_df['target'] != "IRN")]
# filtered_iso_pairs_df = filtered_iso_pairs_df [(filtered_iso_pairs_df['source'] == "PAK") | (filtered_iso_pairs_df['target'] == "PAK")]

In [None]:
filtered_iso_pairs_df.dropna(subset=['source'], inplace=True)

In [None]:
filtered_iso_pairs_df['count'] = scaler.fit_transform(filtered_iso_pairs_df[['count']])

In [None]:
# Create an empty graph
terrorism_graphs = []

# Iterate over each year in trade_df
for i in sorted(trade_df['year'].unique()):
    
    # Empty graph
    graph = nx.Graph()
    
    # index
    index = int(i)
    index = i - 2000
    index = int (index)
    
    # Get the graph corresponding to the year from trade_graphs
    year_graph = trade_graphs[index]
    
    # Add all the nodes from the year_graph to the graph
    graph.add_nodes_from (year_graph.nodes())
    
    # Get the pairs of nodes for the current year from filtered_iso_pairs_df
    year_pairs = filtered_iso_pairs_df [filtered_iso_pairs_df['iyear'] == i][['source', 'target', 'count']]
    
    # Add edges to the graph based on the pairs of nodes in year_pairs
    for _, row in year_pairs.iterrows():
        source = row['source']
        target = row['target']
        weight = row['count']
        if source in graph.nodes() and target in graph.nodes():
            graph.add_edge(source, target, weight = weight)
    
    # Remove nodes from graph that are not in year_graph
    # graph.remove_nodes_from(node for node in graph.nodes() if node not in year_graph.nodes())
    
    # adding graph 
    terrorism_graphs.append(graph)

In [None]:
G = terrorism_graphs[14]
pos = nx.spring_layout(G, seed=41, k = .8)
nx.draw_networkx (G, pos, with_labels=True, node_size=5, node_color='lightblue', font_size=7, font_color='black', edge_color='gray', width=0.5)

In [None]:
print (trade_graphs[14].nodes() == terrorism_graphs[14].nodes())

### Migration

In [None]:
mig_df = pd.read_csv ("/Users/teddyyankov/Library/CloudStorage/OneDrive-Nexus365/Applied Analytical Statitics/Summative/dat_imputed.csv")
df_subset = pd.DataFrame (mig_df[['orig', 'dest', 'year', 'forced_mig']])
df_subset_filtered = df_subset[df_subset['year'].isin(trade_df['year'])]
df_subset_filtered['forced_mig'] = scaler.fit_transform(df_subset_filtered[['forced_mig']])
display (df_subset_filtered)

In [None]:
# Create an empty graph
mig_graphs = []

# Iterate over each year in trade_df
for i in sorted(trade_df['year'].unique()):
    
    # Empty graph
    graph = nx.Graph()
    
    # index
    index = int(i)
    index = i - 2000
    index = int (index)
    
    # Get the graph corresponding to the year from trade_graphs
    year_graph = trade_graphs[index]
    
    # Add all the nodes from the year_graph to the graph
    graph.add_nodes_from (year_graph.nodes())
    
    # Get the pairs of nodes for the current year from filtered_iso_pairs_df
    year_pairs = df_subset_filtered [df_subset_filtered['year'] == i][['orig', 'dest', 'forced_mig']]
    
    # Add edges to the graph based on the pairs of nodes in year_pairs
    for _, row in year_pairs.iterrows():
        source = row['orig']
        target = row['dest']
        weight = row['forced_mig']
        if source in graph.nodes() and target in graph.nodes():
            graph.add_edge(source, target, weight=weight)

    # Remove nodes from graph that are not in year_graph
    # graph.remove_nodes_from(node for node in graph.nodes() if node not in year_graph.nodes())

    # adding graph 
    mig_graphs.append(graph)

In [None]:
G = mig_graphs[14]
pos = nx.spring_layout(G, seed=41, k = .8)
nx.draw_networkx (G, pos, with_labels=True, node_size=5, node_color='lightblue', font_size=7, font_color='black', edge_color='gray', width=0.5)

In [None]:
print (mig_graphs[14].nodes() == terrorism_graphs[14].nodes())
print (mig_graphs[14].nodes() == trade_graphs[14].nodes())

## Distances

In [None]:
import pickle

# Save mig_graphs
with open('mig_graphs.pkl', 'wb') as f:
    pickle.dump(mig_graphs, f)

# Save trade_graphs
with open('trade_graphs.pkl', 'wb') as f:
    pickle.dump(trade_graphs, f)

# Save terrorism_graphs
with open('terrorism_graphs.pkl', 'wb') as f:
    pickle.dump(terrorism_graphs, f)

In [None]:
def spanning_tree_similarity(G1, G2):
    '''
    ## Parameters
    G1: Graph object at time t
    G2: Graph object at time t+n

    ## Return 
    Spanning tree similarity metric
    '''

    # create empty list of eigenvalues 
    eigens = []

    for G in [G1, G2]:
        L = nx.normalized_laplacian_matrix(G) # generate the Laplacian

        L_eigens = np.linalg.eigvals(L.toarray()) # obtain the eigenvalues of L

        L_eigens = L_eigens[L_eigens>0] # get eigenvalues that are larger than 0 

        eigens.append(sorted(L_eigens, reverse=False)) # sort from small to big 

    # calculate the product of eigen values
        
    ST_vals = []
    
    for eigen in eigens:
        prod = np.prod(eigen)

        ST = prod / len(eigen)

        ST_vals.append(ST)
    
    # calculate the difference between the two values 
    distance = np.abs(np.log(ST_vals[1]) - np.log(ST_vals[0]))

    return distance

In [None]:
## distance functions 
## creating weighted versions of my functions
# Jaccard
def weighted_jaccard_distance(G1, G2):
    
    '''
    Function to compute the weighted version of the Jaccard distance between two graphs
    using their adjacency matrices
    
    - Input: a pair of network graph objects
    - Output: Jaccard distance coefficient
    '''
    
    # getting adjacency matrices of the graphs
    A1 = nx.linalg.graphmatrix.adjacency_matrix (G1, weight='weight').todense()
    A2 = nx.linalg.graphmatrix.adjacency_matrix (G2, weight='weight').todense()

    # computing numerator and denominator of the weighted Jaccard distance
    numerator = np.sum (np.abs (A1 - A2))
    denominator = np.sum (np.maximum (A1, A2))

    # computing the weighted Jaccard distance
    jaccard_distance = numerator / denominator
    return jaccard_distance

# IM
def weighted_ipsenMikhailov_distance (G1, G2, gamma = 0.1, limit = 100): 
    
    '''
    ADD DESCRIPTION
    '''
    from scipy.integrate import simps
    
    # weighted adjacency matrices
    A1 = nx.linalg.graphmatrix.adjacency_matrix (G1, weight = 'weight').todense()
    A2 = nx.linalg.graphmatrix.adjacency_matrix (G2, weight = 'weight').todense()
    
    # baseline function for IM distances
    def IMdistance (A1, A2, gamma): 
            
        # number of nodes 
        n = len(A1)
            
        # Laplacians
        L1 = laplacian (A1, normed = False)
        L2 = laplacian (A2, normed = False)
            
        # ω: vibrational frequencies
        w1 = np.sqrt (np.abs (eigh (L1)[0][1:]))
        w2 = np.sqrt (np.abs (eigh (L2)[0][1:]))
            
        # normalisation constants K (l2 norm)
        norm1 = (n - 1) * np.pi / 2 - np.sum (np.arctan (-w1 / gamma))
        norm2 = (n - 1) * np.pi / 2 - np.sum (np.arctan (-w2 / gamma))
            
        # spectral densitites ρ(ω, γ)
        density1 = lambda w: np.sum (gamma / ((w - w1) ** 2 + gamma**2)) / norm1
        density2 = lambda w: np.sum (gamma / ((w - w2) ** 2 + gamma**2)) / norm2
            
        # IM distance
        func = lambda w: (density1(w) - density2(w)) ** 2
        return np.sqrt (quad (func, 0, np.inf, limit = limit)[0])

    # computing distance
    distance = IMdistance (A1, A2, gamma)
    return distance

# poly 
def weighted_polynomial_distance (G1, G2, k = 5, alpha = 1): 
    
    '''
    Function to compute the polynomial spectral distance between two graphs
    using their polynomial transformation of the eigenvalues of the
    of the adjacency matrix in combination with the eigenvectors of the
    adjacency matrix.
    
    - Input(s): 
            G1, G2 -> a pair of network graph objects
            k -> maximum degree of the polynomial used in 
                 the polynomial dissimilarity distance calculation
            alpha -> parameter controlling the influence of the 
                 polynomial transformation on the similarity score calculation
    - Output: Polynomial distance coefficient
    '''
    
    # getting adjacency matrices of the graphs
    A1 = nx.linalg.graphmatrix.adjacency_matrix (G1, weight = 'weight').todense()
    A2 = nx.linalg.graphmatrix.adjacency_matrix (G2, weight = 'weight').todense()
    
    # similarity 
    def similarity(A, k, alpha): 
        
        # eigen-decomposition
        eigVals, eigVec = np.linalg.eig(A)
        
        # shape of adjMatrix -> number of nodes
        n = np.shape(A)[0]
        
        # defining polynomial
        def polynomial(degree):
            
            # replicating formula
            return eigVals**degree / (n - 1) ** (alpha * (degree - 1))
        
        # diagonal matrix constructed from the sum of the polynomial transformations
        W = np.diag (sum([polynomial(k) for k in range (1, k + 1)]))
        
        # similarity score matrix 
        similarityScore = np.dot (np.dot (eigVec, W), eigVec.T)
        return similarityScore
    
    # computing similarityScore for each adjMatrix
    simi_A1 = similarity(A1, k, alpha)
    simi_A2 = similarity(A2, k, alpha)
    
    # polynomial distance
    polyDist = np.linalg.norm (simi_A1 - simi_A2, ord = "fro") / A1.shape[0] ** 2
    
    return polyDist

In [None]:
## Jaccard mig and terror series
mig_ter_jaccard = []

for i in range(len(mig_graphs)):
    mig_graph = mig_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = weighted_jaccard_distance (mig_graph, terrorism_graph)
    mig_ter_jaccard.append(distance)

mig_ter_jaccard = pd.Series (mig_ter_jaccard)
print (mig_ter_jaccard)

In [None]:
## Jaccard trade and terror series
tra_ter_jaccard = []

for i in range(len(trade_graphs)):
    trade_graph = trade_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = weighted_jaccard_distance (trade_graph, terrorism_graph)
    tra_ter_jaccard.append(distance)

tra_ter_jaccard = pd.Series (tra_ter_jaccard)
print (tra_ter_jaccard)

In [None]:
## Jaccard mig and terror series
mig_ter_poly = []

for i in range(len(mig_graphs)):
    mig_graph = mig_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = weighted_polynomial_distance (mig_graph, terrorism_graph)
    mig_ter_poly.append(distance)

mig_ter_poly = pd.Series (mig_ter_poly)
print (mig_ter_poly)

In [None]:
## Jaccard mig and terror series
tra_ter_poly = []

for i in range(len(trade_graphs)):
    trade_graph = trade_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = weighted_polynomial_distance (trade_graph, terrorism_graph, k = 5, alpha = 1)
    tra_ter_poly.append(distance)

tra_ter_poly = pd.Series (tra_ter_poly)
print (tra_ter_poly)

In [None]:
# spanning mig and terror
mig_ter_span = []

for i in range(len(mig_graphs)):
    mig_graph = mig_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = spanning_tree_similarity (mig_graph, terrorism_graph)
    mig_ter_span.append(distance)

mig_ter_span = pd.Series (mig_ter_span)
print (mig_ter_span)

In [None]:
# spanning mig and terror
tra_ter_span = []

for i in range(len(mig_graphs)):
    trade_graph = trade_graphs[i]
    terrorism_graph = terrorism_graphs[i]
    distance = spanning_tree_similarity (trade_graph, terrorism_graph)
    tra_ter_span.append(distance)

tra_ter_span = pd.Series (tra_ter_span)
print (tra_ter_span)

## Creating average networks

### Flattening time series

#### Migration

In [None]:
## remove final element of mig_graphs
mig_graphs2 = mig_graphs.copy()
mig_graphs2.pop()
mig_graphs2.pop()

# Create a new list for the scaled graphs
scaled_mig_graphs = []

# Calculate the scaling factors
num_graphs = len(mig_graphs2)
scaling_factors = np.exp(np.linspace(-1, 0, num_graphs))

# Iterate over each graph in mig_graphs
for i, graph in enumerate(mig_graphs2):
    
    # Create a new graph
    scaled_graph = nx.Graph()

    # Iterate over each edge in the current graph
    for u, v, data in graph.edges(data=True):
        # Scale the weight and add the edge to the new graph
        scaled_weight = data['weight'] * scaling_factors[i]
        scaled_graph.add_edge(u, v, weight=scaled_weight)

    # Add the new graph to the list of scaled graphs
    scaled_mig_graphs.append(scaled_graph)

In [None]:
# Create a new graph
mig_graph_flat = nx.Graph()

# Iterate over each graph in mig_graphs
for graph in scaled_mig_graphs:

    # Iterate over each edge in the current graph
    for u, v, data in graph.edges(data=True):

        # If the edge already exists in the combined_graph, add the weight to the existing weight
        if mig_graph_flat.has_edge(u, v):
            mig_graph_flat[u][v]['weight'] += data['weight']

        # If the edge does not exist in the combined_graph, add it with the current weight
        else:
            mig_graph_flat.add_edge(u, v, weight=data['weight'])

In [None]:
mig_graph_flat.get_edge_data('SYR', 'GRC')

#### Trade

In [None]:
## remove final element of mig_graphs
trade_graphs2 = trade_graphs.copy()
trade_graphs2.pop()
trade_graphs2.pop()

# Create a new list for the scaled graphs
scaled_trade_graphs = []

# Calculate the scaling factors
num_graphs = len(trade_graphs2)
scaling_factors = np.exp(np.linspace(-1, 0, num_graphs))

# Iterate over each graph in mig_graphs
for i, graph in enumerate(trade_graphs2):
    
    # Create a new graph
    scaled_graph = nx.Graph()

    # Iterate over each edge in the current graph
    for u, v, data in graph.edges(data=True):
        # Scale the weight and add the edge to the new graph
        scaled_weight = data['weight'] * scaling_factors[i]
        scaled_graph.add_edge(u, v, weight=scaled_weight)

    # Add the new graph to the list of scaled graphs
    scaled_trade_graphs.append(scaled_graph)

In [None]:
# Create a new graph
trade_graph_flat = nx.Graph()

# Iterate over each graph in mig_graphs
for graph in scaled_trade_graphs:

    # Iterate over each edge in the current graph
    for u, v, data in graph.edges(data=True):

        # If the edge already exists in the combined_graph, add the weight to the existing weight
        if trade_graph_flat.has_edge(u, v):
            trade_graph_flat[u][v]['weight'] += data['weight']

        # If the edge does not exist in the combined_graph, add it with the current weight
        else:
            trade_graph_flat.add_edge(u, v, weight=data['weight'])

#### Terrorism

In [None]:
## getting the last graph in terrorism graphs
terrorism_graphs2 = terrorism_graphs.copy()
terrorism_graph_2020 = terrorism_graphs2[-2]

### Mig-trade average network

In [None]:
# Create a new graph
combined_graph_flat = nx.Graph()

# Iterate over each edge in mig_graph_flat
for u, v, data in mig_graph_flat.edges(data=True):

    # If the edge also exists in trade_graph_flat, add the average weight to the combined_graph
    if trade_graph_flat.has_edge(u, v):
        avg_weight = (data['weight'] + trade_graph_flat[u][v]['weight']) / 2
        combined_graph_flat.add_edge(u, v, weight=avg_weight)

# Iterate over each edge in trade_graph_flat
for u, v, data in trade_graph_flat.edges(data=True):
    
    # If the edge does not exist in the combined_graph (and therefore does not exist in mig_graph_flat), add it with the current weight
    if not combined_graph_flat.has_edge(u, v):
        combined_graph_flat.add_edge(u, v, weight=data['weight'])

In [None]:
# Get the nodes from both graphs
mig_nodes = set(mig_graph_flat.nodes)
trade_nodes = set(trade_graph_flat.nodes)

# Find the common nodes
common_nodes = mig_nodes.intersection(trade_nodes)

print(common_nodes)

## Implementing LSM

In [67]:
## libraries
from node2vec import Node2Vec
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import random
from sklearn.model_selection import cross_val_score

In [59]:
# Get all nodes in the graph
nodes = list(terrorism_graph_2020.nodes)

# Generate non-edges
non_edges = []
while len(non_edges) < len(terrorism_graph_2020.edges):  # generate the same number of non-edges as there are edges
    # Randomly select two nodes
    i, j = random.sample(nodes, 2)

    # If there is no edge between them, add them as a non-edge
    if not terrorism_graph_2020.has_edge(i, j):
        non_edges.append((i, j))

In [66]:
# Generate Node2Vec embeddings
node2vec = Node2Vec(combined_graph_flat, dimensions=64, walk_length=30, num_walks=200, workers=4)
model = node2vec.fit(window=10, min_count=1)

# Prepare the link prediction data
positive_examples = [np.concatenate ((model.wv[str(i)], model.wv[str(j)])) for i, j in terrorism_graph_2020.edges]
negative_examples = [np.concatenate ((model.wv[str(i)], model.wv[str(j)])) for i, j in non_edges] 
X = positive_examples + negative_examples
y = [1] * len (positive_examples) + [0] * len (negative_examples)
X_train, X_test, y_train, y_test = train_test_split (X, y, test_size = 0.5, random_state = 42)

# Train a logistic regression model
clf = LogisticRegression(random_state=0).fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict_proba(X_test)[:, 1]
print('AUC-ROC:', roc_auc_score(y_test, y_pred))

Computing transition probabilities:   0%|          | 0/211 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 50/50 [00:04<00:00, 11.92it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:04<00:00, 11.85it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:04<00:00, 12.06it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:04<00:00, 11.81it/s]


AUC-ROC: 0.75


In [68]:
# Generate Node2Vec embeddings
node2vec = Node2Vec(combined_graph_flat, dimensions=64, walk_length=30, num_walks=200, workers=4)
model = node2vec.fit(window=10, min_count=1)

# Prepare the link prediction data
positive_examples = [np.concatenate((model.wv[str(i)], model.wv[str(j)])) for i, j in terrorism_graph_2020.edges]
negative_examples = [np.concatenate((model.wv[str(i)], model.wv[str(j)])) for i, j in non_edges]
X = positive_examples + negative_examples
y = [1] * len(positive_examples) + [0] * len(negative_examples)

# Train a logistic regression model with 15-fold cross-validation
clf = LogisticRegression(random_state=0)
scores = cross_val_score(clf, X, y, cv=15, scoring='roc_auc')

# Print the AUC-ROC score for each fold
for i, score in enumerate(scores, start=1):
    print(f'AUC-ROC for fold {i}: {score}')

# Print the average AUC-ROC score
print('Average AUC-ROC:', scores.mean())

Computing transition probabilities:   0%|          | 0/211 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 50/50 [00:04<00:00, 11.75it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:04<00:00, 11.11it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:04<00:00, 11.40it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:04<00:00, 11.30it/s]


AUC-ROC for fold 1: 0.75
AUC-ROC for fold 2: 1.0
AUC-ROC for fold 3: 0.5
AUC-ROC for fold 4: 1.0
AUC-ROC for fold 5: 0.0
AUC-ROC for fold 6: 1.0
AUC-ROC for fold 7: 1.0
AUC-ROC for fold 8: 1.0
AUC-ROC for fold 9: 1.0
AUC-ROC for fold 10: 1.0
AUC-ROC for fold 11: 1.0
AUC-ROC for fold 12: 1.0
AUC-ROC for fold 13: 1.0
AUC-ROC for fold 14: 1.0
AUC-ROC for fold 15: 1.0
Average AUC-ROC: 0.8833333333333333


In [70]:
# Generate Node2Vec embeddings
node2vec = Node2Vec(combined_graph_flat, dimensions=64, walk_length=30, num_walks=200, workers=4)
model = node2vec.fit(window=10, min_count=1)

# Prepare the link prediction data
positive_examples = [np.concatenate((model.wv[str(i)], model.wv[str(j)])) for i, j in terrorism_graph_2020.edges]
negative_examples = [np.concatenate((model.wv[str(i)], model.wv[str(j)])) for i, j in non_edges]
X = positive_examples + negative_examples
y = [1] * len(positive_examples) + [0] * len(negative_examples)

# Train a logistic regression model with 15-fold cross-validation
clf = LogisticRegression(random_state=0)

# Calculate and print the AUC-ROC score for each fold
scores_auc = cross_val_score(clf, X, y, cv=15, scoring='roc_auc')
print('Average AUC-ROC:', scores_auc.mean())

# Calculate and print the accuracy for each fold
scores_accuracy = cross_val_score(clf, X, y, cv=15, scoring='accuracy')
print('Average Accuracy:', scores_accuracy.mean())

# Calculate and print the F1 score for each fold
scores_f1 = cross_val_score(clf, X, y, cv=15, scoring='f1')
print('Average F1 Score:', scores_f1.mean())

Computing transition probabilities:   0%|          | 0/211 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 50/50 [00:04<00:00, 11.98it/s]
Generating walks (CPU: 2): 100%|██████████| 50/50 [00:04<00:00, 12.21it/s]
Generating walks (CPU: 3): 100%|██████████| 50/50 [00:04<00:00, 12.20it/s]
Generating walks (CPU: 4): 100%|██████████| 50/50 [00:04<00:00, 12.26it/s]


Average AUC-ROC: 0.8666666666666667
Average Accuracy: 0.6944444444444443
Average F1 Score: 0.6888888888888887
