### Import

In [53]:
import networkx as nx
from node2vec import Node2Vec
import pickle
import pandas as pd
import numpy as np

In [14]:
months = ['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30']
graphs = []

for month in months:
    with open(f'../data/graphs/graph_{month}.pkl', 'rb') as f:
        g = pickle.load(f)
        graphs.append(g)
        print(f'For {month}: {g.edges}')

For 2015-01-31: [('AFG', 'PAK'), ('AFG', 'USA'), ('PAK', 'CHN'), ('PAK', 'MEX'), ('PAK', 'SAU'), ('PAK', 'USA'), ('USA', 'AFR'), ('USA', 'AUS'), ('USA', 'BGD'), ('USA', 'BLR'), ('USA', 'BLZ'), ('USA', 'CAN'), ('USA', 'CHE'), ('USA', 'CHL'), ('USA', 'CHN'), ('USA', 'COD'), ('USA', 'COK'), ('USA', 'COL'), ('USA', 'CUB'), ('USA', 'DEU'), ('USA', 'EGY'), ('USA', 'ESP'), ('USA', 'EUR'), ('USA', 'FRA'), ('USA', 'GBR'), ('USA', 'GRC'), ('USA', 'HKG'), ('USA', 'IRN'), ('USA', 'ISR'), ('USA', 'JOR'), ('USA', 'JPN'), ('USA', 'KEN'), ('USA', 'KOR'), ('USA', 'LCA'), ('USA', 'MEX'), ('USA', 'MYS'), ('USA', 'NIC'), ('USA', 'NMR'), ('USA', 'PAN'), ('USA', 'PER'), ('USA', 'PHL'), ('USA', 'RUS'), ('USA', 'SAU'), ('USA', 'SEA'), ('USA', 'SYR'), ('USA', 'TUR'), ('USA', 'UGA'), ('USA', 'YEM'), ('AFR', 'BDI'), ('AFR', 'DJI'), ('AFR', 'NGA'), ('AFR', 'SDN'), ('AFR', 'SOM'), ('AFR', 'ZAF'), ('SDN', 'KOR'), ('SDN', 'LBY'), ('SOM', 'TUR'), ('ZAF', 'CUB'), ('ZAF', 'GBR'), ('ARE', 'AUS'), ('ARE', 'BHR'), ('ARM',

In [15]:
graphs

[<networkx.classes.graph.Graph at 0x21fd23cc910>,
 <networkx.classes.graph.Graph at 0x21fd2229b70>,
 <networkx.classes.graph.Graph at 0x21fd23cc610>,
 <networkx.classes.graph.Graph at 0x21ff5107e80>]

In [30]:
january_node2vec = Node2Vec(graphs[0], dimensions=64, walk_length=30, num_walks=100, workers=4)
january_model = january_node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities:   0%|          | 0/103 [00:00<?, ?it/s]

In [31]:
february_node2vec = Node2Vec(graphs[1], dimensions=64, walk_length=30, num_walks=100, workers=4)
february_model = february_node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities:   0%|          | 0/194 [00:00<?, ?it/s]

In [32]:
march_node2vec = Node2Vec(graphs[2], dimensions=64, walk_length=30, num_walks=100, workers=4)
march_model = march_node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities:   0%|          | 0/213 [00:00<?, ?it/s]

In [33]:
april_node2vec = Node2Vec(graphs[3], dimensions=64, walk_length=30, num_walks=100, workers=4)
april_model = april_node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities:   0%|          | 0/211 [00:00<?, ?it/s]

In [34]:
print(len(graphs[0].nodes) == len(graphs[1].nodes) == len(graphs[2].nodes) == len(graphs[3].nodes))

False


In [42]:
ser1 = pd.Series([n for n in graphs[0].nodes])
ser2 = pd.Series([n for n in graphs[1].nodes])
ser3 = pd.Series([n for n in graphs[2].nodes])
ser4 = pd.Series([n for n in graphs[3].nodes])
intersection_set = set(ser1).intersection(set(ser2)).intersection(set(ser3)).intersection(set(ser4))

In [43]:
intersection_set

{'AFG',
 'AFR',
 'ARE',
 'ARM',
 'AUS',
 'BDI',
 'BFA',
 'BGD',
 'BGR',
 'BHR',
 'BLR',
 'BLZ',
 'BRA',
 'CAN',
 'CHE',
 'CHL',
 'CHN',
 'CIV',
 'CMR',
 'COD',
 'COG',
 'COK',
 'COL',
 'CUB',
 'DEU',
 'DJI',
 'ECU',
 'EGY',
 'ERI',
 'ESP',
 'EST',
 'ETH',
 'EUR',
 'FIN',
 'FJI',
 'FRA',
 'GBR',
 'GHA',
 'GMB',
 'GRC',
 'HKG',
 'HUN',
 'IDN',
 'IND',
 'IRL',
 'IRN',
 'IRQ',
 'ISR',
 'ITA',
 'JOR',
 'JPN',
 'KAZ',
 'KEN',
 'KGZ',
 'KHM',
 'KOR',
 'LBN',
 'LBY',
 'LCA',
 'LKA',
 'LSO',
 'LTU',
 'LVA',
 'MDA',
 'MDV',
 'MEX',
 'MLI',
 'MMR',
 'MYS',
 'NER',
 'NGA',
 'NIC',
 'NMR',
 'NZL',
 'PAK',
 'PAN',
 'PER',
 'PHL',
 'POL',
 'PRK',
 'PSE',
 'QAT',
 'RUS',
 'SAF',
 'SAU',
 'SDN',
 'SEA',
 'SGP',
 'SOM',
 'SWE',
 'SYR',
 'THA',
 'TUR',
 'TWN',
 'TZA',
 'UGA',
 'UKR',
 'USA',
 'VEN',
 'WST',
 'YEM',
 'ZAF'}

In [49]:
avg_vectors = dict()
for node in intersection_set:
    avg_vector = []
    for i in range(64):
        avg_vector.append((january_model.wv[node][i] + february_model.wv[node][i] + march_model.wv[node][i] + april_model.wv[node][i]) / 4)
    avg_vectors[node] = avg_vector

In [46]:
graphs[0].nodes['USA']


{}

In [48]:
january_model.wv['USA']

array([ 0.00805359, -0.41069472,  0.08025012,  0.18132272, -0.07338174,
        0.0967021 ,  0.16103968, -0.3575849 , -0.09403688,  0.09662201,
        0.17541271,  0.24949054, -0.30215856, -0.21631047, -0.05898061,
       -0.15703103, -0.19601299,  0.02536008, -0.0911091 ,  0.25507605,
        0.20756319, -0.0623293 , -0.02127473, -0.08807782,  0.03118874,
        0.16681878, -0.36662927,  0.00431624, -0.05229433, -0.04134625,
        0.05770209, -0.15232214,  0.17704554, -0.03045174,  0.12073768,
       -0.29191405,  0.02460024, -0.10743121,  0.32383433,  0.07674116,
       -0.17210363,  0.09216761,  0.20085992, -0.22286697,  0.06003831,
       -0.03110959,  0.14827871, -0.18903807, -0.12933174, -0.305922  ,
        0.2306685 ,  0.1659075 ,  0.20122707,  0.04524691, -0.01962503,
        0.04220286,  0.10706147, -0.19292687,  0.15454836,  0.14200336,
        0.2355939 , -0.64727193, -0.12488485,  0.5257158 ], dtype=float32)

In [52]:
len(avg_vectors['USA'])

64

### Scalar product of two vectors for link prediction

In [58]:
dot_products = dict()
for node1 in avg_vectors:
    for node2 in avg_vectors:
        if node1 != node2:
            vector1 = np.array(avg_vectors[node1])
            vector2 = np.array(avg_vectors[node2])
            n_sorted = sorted([node1, node2])
            dot_products[f'{n_sorted[0]}-{n_sorted[1]}'] = np.dot(vector1, vector2)

In [59]:
dot_products

{'EGY-YEM': 0.4283489457309765,
 'GHA-YEM': 0.3602800481325936,
 'PRK-YEM': 0.5470879337537873,
 'LSO-YEM': 0.6195953796082045,
 'JOR-YEM': 0.49948788861681565,
 'FIN-YEM': 0.37360859497770216,
 'SAF-YEM': 0.7280947020702684,
 'PAK-YEM': 0.3685260304199317,
 'CHN-YEM': 0.4175462892190181,
 'CAN-YEM': 0.4852178556069977,
 'QAT-YEM': 0.4206700107358517,
 'LTU-YEM': 0.4255367463055428,
 'CIV-YEM': 0.825029161225032,
 'UGA-YEM': 0.37070443739760844,
 'KHM-YEM': 0.39069491885339186,
 'CMR-YEM': 0.35648378612355264,
 'AUS-YEM': 0.5763852756158736,
 'YEM-ZAF': 0.5032942219376086,
 'ARE-YEM': 0.625871943338062,
 'TZA-YEM': 0.4999238012165432,
 'LBY-YEM': 0.7365219115573337,
 'MYS-YEM': 0.5090782803518643,
 'MMR-YEM': 0.49278247306609535,
 'IDN-YEM': 0.4992166608520954,
 'FRA-YEM': 0.45818490633374687,
 'SWE-YEM': 0.4500089912781987,
 'SAU-YEM': 0.4864716580844405,
 'IRQ-YEM': 0.2956049625985246,
 'KEN-YEM': 0.4394998008334926,
 'NIC-YEM': 0.3809984635040051,
 'RUS-YEM': 0.45288854941162726,
 '

AttributeError: 'dict' object has no attribute 'min'