In [23]:
import networkx as nx
import numpy as np
import itertools
import math
import json

In [2]:
val_g1 = nx.read_edgelist('validation/validation_G1.edgelist', nodetype=int)
val_g2 = nx.read_edgelist('validation/validation_G2.edgelist', nodetype=int)

In [3]:
val_g1_A = nx.adj_matrix(val_g1).todense()
val_g2_A = nx.adj_matrix(val_g2).todense()

In [4]:
def load_mapping(file):
    mapping = {}
    for u, v in ([n.strip().split(' ') for n in open(file).readlines()]):
        mapping[int(u)] = int(v)
    return mapping

In [5]:
def knbrs(g=None, start=None, k=1):
    knbrs = list(nx.single_source_shortest_path_length(g, start, cutoff=k).keys())
    knbrs.remove(start)
    return set(knbrs)

In [6]:
val_mapping = load_mapping('validation/validation_seed_mapping.txt')

### Seed based

In [7]:
g1 = nx.read_edgelist('seedbased/G1.edgelist', nodetype=int)
g2 = nx.read_edgelist('seedbased/G2.edgelist', nodetype=int)

In [45]:
g_seed = load_mapping('seedbased/seed_node_pairs.txt')
g_seed_keys = set(g_seed.keys())

In [127]:
g1_nodes = np.array(g1.nodes)
g1_len = g1.number_of_nodes()
g2_nodes = np.array(g2.nodes)
g2_len = g2.number_of_nodes()
sim_mapping = {}
for ix, m in enumerate(g1_nodes):
    print(f'Matching node from g1: {m} [{ix}/{g1_len}]')
    
    m = 22
    g1_nbrs = knbrs(g1, m, 1)
    g1_nbrs_len = len(g1_nbrs)
    g1_nbrs_seed = set(g1_nbrs).intersection(g_seed_keys)
    
    g1_nbrs2 = knbrs(g1, m, 2)
    g1_nbrs2_len = len(g1_nbrs2)
    g1_nbrs2_seed = set(g1_nbrs2).intersection(g_seed_keys)
    
    g1_nbrs3 = knbrs(g1, m, 3)
    g1_nbrs3_len = len(g1_nbrs3)
    g1_nbrs3_seed = set(g1_nbrs3).intersection(g_seed_keys)
    
    sim = {}
    for jx, n in enumerate(g2_nodes, 0):
        print(f' -> to g2: {n} [{jx}/{g2_len}]', end='\r')
        c1, c2, c3 = 0, 0, 0
        m1, m2, m3 = 0, 0, 0
        
        g2_nbrs = knbrs(g2, n, 1)
        g2_nbrs_len = len(g2_nbrs)
        g2_nbrs_seed = set(g2_nbrs).intersection(g_seed_keys)
        
        for i, j in itertools.product(g1_nbrs_seed, g2_nbrs_seed):
            if g_seed[i]==j:
                m1 += 1
        
        c1 = m1 / (math.sqrt(g1_nbrs_len) * math.sqrt(g2_nbrs_len))
        
        if c1 > 0:
            g2_nbrs2 = knbrs(g2, n, 2)
            g2_nbrs2_len = len(g2_nbrs2)
            g2_nbrs2_seed = set(g2_nbrs2).intersection(g_seed_keys)
            
            for i, j in itertools.product(g1_nbrs2_seed, g2_nbrs2_seed):
                if g_seed[i] == j:
                    m2 += 1

            c2 =  (m1 * m2) / (math.sqrt(g1_nbrs2_len) * math.sqrt(g2_nbrs2_len))
        
        if c2 > 0:
            g2_nbrs3 = knbrs(g2, n, 3)
            g2_nbrs3_len = len(g2_nbrs3)
            g2_nbrs3_seed = set(g2_nbrs3).intersection(g_seed_keys)
            
            for i, j in itertools.product(g1_nbrs3_seed, g2_nbrs3_seed):
                if g_seed[i] == j:
                    m3 += 1

            c3 =  (m1 * m2 * m3) / (math.sqrt(g1_nbrs3_len) * math.sqrt(g2_nbrs3_len))
            c = round(c1 + c2 + c3, 6)
            c1, c2, c3 = round(c1, 6), round(c2, 6), round(c3, 6)
            sim[(m, n)] = c
            print([m, n], [m1, m2, m3], [c1, c2, c3], c)
    
    if len(sim)>0:
        top = max(sim, key=sim.get)
        sim_mapping[top] = sim[top]
        print('\n ### Matched: ', top)

Matching node from g1: 0 [0/4563]
[22, 4517] [1, 5, 47] [0.029463, 0.003945, 0.054426] 0.087833
[22, 3045] [1, 7, 47] [0.030179, 0.006341, 0.076898] 0.113418
[22, 3428] [1, 7, 47] [0.018932, 0.005071, 0.075984] 0.099986
[22, 676] [1, 7, 47] [0.022076, 0.006114, 0.076679] 0.104869
[22, 3711] [1, 3, 47] [0.021339, 0.002215, 0.032634] 0.056188
[22, 1304] [1, 7, 47] [0.043769, 0.007333, 0.077765] 0.128866
[22, 3714] [1, 5, 47] [0.024984, 0.003784, 0.054335] 0.083103
[22, 2793] [1, 5, 47] [0.029013, 0.003599, 0.054081] 0.086693
[22, 2050] [1, 7, 47] [0.030429, 0.006264, 0.077092] 0.113785
[22, 3750] [1, 7, 47] [0.024845, 0.006496, 0.077066] 0.108407
[22, 3633] [1, 7, 47] [0.033672, 0.007582, 0.077801] 0.119055
[22, 271] [1, 4, 46] [0.050252, 0.004839, 0.043527] 0.098618
[22, 2990] [1, 7, 47] [0.029696, 0.006268, 0.076767] 0.11273
[22, 76] [1, 7, 47] [0.055556, 0.009321, 0.078718] 0.143594
[22, 3256] [1, 6, 47] [0.043769, 0.005941, 0.066239] 0.115949
[22, 3303] [1, 5, 47] [0.021253, 0.003188

In [128]:
sim_mapping

{(22, 2124): 0.16167}

In [39]:
def write_mappings_t0_file(file):
    mappings = [f'{a} {b}\n' for a, b in list(sim_mapping.items())]
    f = open(file, 'w')
    f.writelines(mappings)
    f.flush()

In [122]:
g_seed

{0: 3389,
 1: 2199,
 2: 269,
 3: 3398,
 4: 4338,
 5: 1268,
 6: 2373,
 7: 2023,
 8: 1371,
 9: 2674,
 10: 3467,
 11: 2561,
 12: 4343,
 13: 971,
 14: 264,
 15: 3474,
 16: 3366,
 17: 3689,
 18: 3871,
 19: 617,
 20: 4045,
 21: 1954,
 22: 76,
 23: 2689,
 24: 3516,
 25: 3722,
 26: 3700,
 27: 1745,
 28: 211,
 29: 3833,
 30: 1918,
 31: 2764,
 32: 3614,
 33: 2178,
 34: 3206,
 35: 1244,
 36: 2730,
 37: 1289,
 38: 1597,
 39: 844,
 40: 1877,
 41: 2263,
 42: 1591,
 43: 3019,
 44: 1031,
 45: 2960,
 46: 3502,
 47: 972,
 48: 267,
 49: 1445,
 50: 2237,
 51: 4456,
 52: 3688,
 53: 3291,
 54: 2940,
 55: 1035,
 56: 1407,
 57: 1814,
 58: 1274,
 59: 2241,
 60: 498,
 61: 4163,
 62: 2522,
 63: 1611,
 64: 1726,
 65: 332,
 66: 4026,
 67: 2236,
 68: 3838,
 69: 893,
 70: 4042,
 71: 1361,
 72: 1857,
 73: 126,
 74: 1621,
 75: 3845,
 76: 4460,
 77: 1129,
 78: 2469,
 79: 240,
 80: 3651,
 81: 995,
 82: 1378,
 83: 2998,
 84: 1606,
 85: 4319,
 86: 1971,
 87: 1500,
 88: 4046,
 89: 4284,
 90: 3181,
 91: 4352,
 92: 1791,
 93