In [2]:
# ! pip3 install munkres --user

# Import Libraries

In [8]:
import os
from collections import defaultdict
import random
import sys
from munkres import Munkres
import numpy
import networkx as nx

In [7]:
import glob
circles_files = glob.glob("twitter/*.circles")
edges_files = glob.glob("twitter/*.edges")
egofeat_files = glob.glob("twitter/*.egofeat")
feat_files = glob.glob("twitter/*.feat")
featnames_files = glob.glob("twitter/*.featnames")

In [16]:
from networkx.algorithms import community


# Create List of egos[973 ego list]

In [9]:
egoNodeList = []
for item in circles_files:
    twitter, circleFilename = item.split("/")
    filename, abcd = circleFilename.split(".")
    egoNodeList.append(filename)

# Filter list of ego on basis of number of edges

In [10]:
filterNodeList = []
num_lines_len = []
for item in edges_files:
    num_lines = sum(1 for line in open(item))
    num_lines_len.append(num_lines)
    if num_lines < 2300:
        twitter, circleFilename = item.split("/")
        filename, abcd = circleFilename.split(".")
        filterNodeList.append(filename)

# Define loss Function

In [11]:
def loss1(usersPerCircle, usersPerCircleP):
    psize = max(len(usersPerCircle),len(usersPerCircleP)) 
    mm = numpy.zeros((psize,psize))
    mm2 = numpy.zeros((psize,psize))
    for i in range(psize):
        for j in range(psize):
            circleP = set() 
            circle = set() 
            if (i < len(usersPerCircleP)):
                circleP = usersPerCircleP[i]
            if (j < len(usersPerCircle)):
                circle = usersPerCircle[j]
            nedits = len(circle.union(circleP)) - len(circle.intersection(circleP)) 
            mm[i][j] = nedits
            mm2[i][j] = nedits

    if psize == 0:
        return 0
    else:
        m = Munkres()
        indices = m.compute(mm) 
        editCost = 0
        for row, column in indices:
            editCost += mm2[row][column]
    return int(editCost)


# Build Graph

In [12]:
def read_nodeadjlist(filename):
    G = nx.Graph()
    for line in open(filename):
        lst = line.split(' ')
        el, es = lst[0], lst[1][:-1]
        G.add_edge(int(el),int(es))
    return G

In [19]:
# G1 = read_nodeadjlist("twitter/78813.edges")

# Define K -Clique function

In [13]:
def computeKClique(inputG, k):
    kclique = list(community.k_clique_communities(inputG , k))
    kcoms = [set(x) for x in kclique]
    return kcoms

In [42]:
# predCircle = computeKClique(G1, 4)

In [4]:
# predCircle

In [24]:
# pred_list = []
# for circle in predCircle:
#     for val in circle:
#         pred_list.append(int(val))


In [26]:
# set_pred = set(pred_list)

# Read Ground truth Circle files

In [14]:
def read_circles(filename):
    final_lst = []
    for line in open(filename):
        lst = line.split('\t')
        el, es = lst[0], lst[1:]
        circle  =set()
        for e in es:
            circle.add(int(e))
        final_lst.append(circle)
    return final_lst

In [37]:
# gt_circles =read_circles("twitter/78813.circles")

In [26]:
# gt_circles

# Calculate total loss all Ego Nets

In [17]:
totalLoss = 0
cnt = 0
for ego in filterNodeList:
    G = read_nodeadjlist("twitter/"+ego+".edges")
    predCircle = computeKClique(G, 4)
    gt_circles = read_circles("twitter/"+ego+".circles")
    calLoss = loss1(gt_circles, predCircle)
    totalLoss +=calLoss
    cnt+=1
    print("cnt",cnt)
    print("loss",calLoss)
print(totalLoss)

cnt 1
loss 14
cnt 2
loss 62
cnt 3
loss 54
cnt 4
loss 29
cnt 5
loss 108
cnt 6
loss 103
cnt 7
loss 154
cnt 8
loss 42
cnt 9
loss 112
cnt 10
loss 116
cnt 11
loss 122
cnt 12
loss 60
cnt 13
loss 23
cnt 14
loss 57
cnt 15
loss 59
cnt 16
loss 36
cnt 17
loss 45
cnt 18
loss 28
cnt 19
loss 83
cnt 20
loss 41
cnt 21
loss 54
cnt 22
loss 96
cnt 23
loss 47
cnt 24
loss 93
cnt 25
loss 12
cnt 26
loss 107
cnt 27
loss 63
cnt 28
loss 110
cnt 29
loss 83
cnt 30
loss 92
cnt 31
loss 89
cnt 32
loss 44
cnt 33
loss 68
cnt 34
loss 58
cnt 35
loss 36
cnt 36
loss 53
cnt 37
loss 81
cnt 38
loss 35
cnt 39
loss 22
cnt 40
loss 145
cnt 41
loss 51
cnt 42
loss 143
cnt 43
loss 161
cnt 44
loss 101
cnt 45
loss 59
cnt 46
loss 111
cnt 47
loss 39
cnt 48
loss 55
cnt 49
loss 137
cnt 50
loss 101
cnt 51
loss 7
cnt 52
loss 69
cnt 53
loss 156
cnt 54
loss 53
cnt 55
loss 66
cnt 56
loss 19
cnt 57
loss 72
cnt 58
loss 54
cnt 59
loss 50
cnt 60
loss 79
cnt 61
loss 68
cnt 62
loss 14
cnt 63
loss 12
cnt 64
loss 38
cnt 65
loss 85
cnt 66
loss 57
cnt 

# Avg Loss 

In [20]:
avgLoss = totalLoss/583

In [21]:
avgLoss

79.12006861063465