In [10]:
import os
from collections import defaultdict
import random
import sys
from munkres import Munkres
import numpy
import networkx as nx

In [11]:
from networkx.algorithms import community


In [8]:
from networkx.algorithms.community import greedy_modularity_communities

In [4]:
def loss1(usersPerCircle, usersPerCircleP):
    psize = max(len(usersPerCircle),len(usersPerCircleP)) # Pad the matrix to be square
    mm = numpy.zeros((psize,psize))
    mm2 = numpy.zeros((psize,psize))
    for i in range(psize):
        for j in range(psize):
            circleP = set() # Match to an empty circle (delete all users)
            circle = set() # Match to an empty circle (add all users)
            if (i < len(usersPerCircleP)):
                circleP = usersPerCircleP[i]
            if (j < len(usersPerCircle)):
                circle = usersPerCircle[j]
            nedits = len(circle.union(circleP)) - len(circle.intersection(circleP)) # Compute the edit distance between the two circles
            mm[i][j] = nedits
            mm2[i][j] = nedits

    if psize == 0:
        return 0 # Edge case in case there are no circles
    else:
        m = Munkres()
        #print mm2 # Print the pairwise cost matrix
        indices = m.compute(mm) # Compute the optimal alignment between predicted and groundtruth circles
        editCost = 0
        for row, column in indices:
            editCost += mm2[row][column]
    return int(editCost)


In [12]:
def read_nodeadjlist(filename):
    G = nx.Graph()
    for line in open(filename):
        lst = line.split(' ')
        el, es = lst[0], lst[1][:-1]
        G.add_edge(int(el),int(es))
    return G

In [19]:
G1 = read_nodeadjlist("twitter/78813.edges")

In [41]:
def computeKClique(inputG, k):
    kclique = list(community.k_clique_communities(inputG , k))
    kcoms = [set(x) for x in kclique]
    return kcoms

In [42]:
predCircle = computeKClique(G1, 4)

In [43]:
predCircle

[{586,
  2038,
  2419,
  3839,
  11628,
  13055,
  13405,
  15023,
  113963,
  174853,
  229523,
  428333,
  621713,
  627363,
  643443,
  655613,
  697163,
  758185,
  782329,
  789314,
  793219,
  806170,
  813491,
  813715,
  817386,
  821449,
  849131,
  949161,
  992031,
  1018211,
  1847381,
  2195241,
  2384071,
  3361871,
  3375371,
  3558801,
  3594701,
  3640341,
  4044361,
  4958131,
  5027041,
  5362182,
  5385852,
  5435752,
  5497452,
  5541662,
  5637652,
  5676102,
  5725652,
  5746402,
  5796972,
  5849202,
  5963912,
  6217542,
  6271152,
  6339822,
  6376372,
  6515122,
  6813682,
  7434252,
  7684302,
  7921352,
  7924912,
  8091052,
  8168192,
  9184682,
  9235972,
  9363302,
  9460662,
  9535182,
  10461992,
  10751252,
  10760422,
  11336782,
  11375732,
  12101862,
  12199652,
  12307282,
  13170222,
  13434092,
  13535762,
  13837292,
  13910012,
  14048987,
  14058661,
  14066988,
  14079172,
  14091119,
  14111698,
  14124059,
  14154082,
  14161531,
  142098

In [24]:
# pred_list = []
# for circle in predCircle:
#     for val in circle:
#         pred_list.append(int(val))


In [26]:
set_pred = set(pred_list)

In [27]:
set_pred

{3839,
 13607,
 428333,
 509323,
 618593,
 641433,
 755603,
 809760,
 818489,
 1057431,
 1471021,
 1501471,
 2029971,
 4519121,
 5413762,
 6598572,
 7183482,
 9406452,
 9870342,
 10266802,
 11340982,
 12600372,
 14120253,
 14667274,
 14958507,
 15456622,
 15527013,
 15780631,
 15814666,
 16107051,
 16129920,
 16303106,
 16457564,
 16530279,
 16901455,
 17129546,
 17643774,
 17853760,
 17870415,
 17870501,
 18030840,
 18681592,
 19579040,
 19996594,
 21212145,
 21523946,
 21931952,
 27242126,
 27463744,
 35702691,
 38272894,
 39782670,
 44984134,
 50888802,
 57186667,
 61819621,
 68571415,
 85416304,
 90630206,
 91718540,
 95822575,
 131884083,
 136055942,
 145753059,
 197141866,
 199032495,
 216126534,
 259110723}

In [36]:
def read_circles(filename):
    final_lst = []
    for line in open(filename):
        lst = line.split('\t')
        el, es = lst[0], lst[1:]
        circle  =set()
        for e in es:
            circle.add(int(e))
        final_lst.append(circle)
    return final_lst

In [37]:
gt_circles =read_circles("twitter/78813.circles")

In [38]:
gt_circles

[{586,
  3839,
  113963,
  174853,
  229523,
  793219,
  813491,
  2384071,
  5676102,
  11336782,
  13535762,
  15948437,
  30313925,
  50393960},
 {3839,
  174853,
  621713,
  849131,
  992031,
  2384071,
  5497452,
  5637652,
  5676102,
  9460662,
  11336782,
  11362622,
  12199652,
  14371227,
  14405111,
  14964767,
  15948437,
  27478849,
  50393960}]

In [44]:
loss1(predCircle, gt_circles)

191