# Import Libraries

In [1]:
import os
from collections import defaultdict
import random
import sys
from munkres import Munkres
import numpy
import networkx as nx

In [2]:
from networkx.algorithms import community
import glob

# Load Files

In [4]:
circles_files = glob.glob("twitter/*.circles")
edges_files = glob.glob("twitter/*.edges")
egofeat_files = glob.glob("twitter/*.egofeat")
feat_files = glob.glob("twitter/*.feat")
featnames_files = glob.glob("twitter/*.featnames")

# Define Loss parameter

In [5]:
def loss1(usersPerCircle, usersPerCircleP):
    psize = max(len(usersPerCircle),len(usersPerCircleP)) 
    mm = numpy.zeros((psize,psize))
    mm2 = numpy.zeros((psize,psize))
    for i in range(psize):
        for j in range(psize):
            circleP = set() 
            circle = set() 
            if (i < len(usersPerCircleP)):
                circleP = usersPerCircleP[i]
            if (j < len(usersPerCircle)):
                circle = usersPerCircle[j]
            nedits = len(circle.union(circleP)) - len(circle.intersection(circleP)) 
            mm[i][j] = nedits
            mm2[i][j] = nedits

    if psize == 0:
        return 0 
    else:
        m = Munkres()
        indices = m.compute(mm) 
        editCost = 0
        for row, column in indices:
            editCost += mm2[row][column]
    return int(editCost)

# Construct Graph using edges

In [6]:
def read_nodeadjlist(filename):
    G = nx.Graph()
    for line in open(filename):
        lst = line.split(' ')
        el, es = lst[0], lst[1][:-1]
        G.add_edge(int(el),int(es))
    return G

# Create List of egos[973 ego list]

In [7]:
egoNodeList = []
for item in circles_files:
    twitter, circleFilename = item.split("/")
    filename, abcd = circleFilename.split(".")
    egoNodeList.append(filename)

# Calculate number of circles on Ground Truth files

In [8]:
filterCircleList = []
for item in egoNodeList:
    circle_file= "twitter/"+item+".circles"
    num_lines = sum(1 for line in open(circle_file))
    filterCircleList.append(num_lines)

In [9]:
filterCircleList

[4,
 4,
 43,
 4,
 5,
 3,
 4,
 2,
 2,
 5,
 2,
 1,
 3,
 42,
 2,
 4,
 5,
 4,
 8,
 4,
 2,
 6,
 4,
 1,
 6,
 2,
 3,
 3,
 10,
 2,
 3,
 5,
 3,
 3,
 2,
 21,
 3,
 2,
 23,
 2,
 6,
 2,
 2,
 3,
 3,
 2,
 14,
 2,
 3,
 2,
 3,
 1,
 4,
 5,
 13,
 1,
 10,
 2,
 2,
 3,
 2,
 2,
 13,
 2,
 4,
 8,
 7,
 4,
 1,
 0,
 1,
 1,
 3,
 2,
 7,
 2,
 1,
 2,
 2,
 4,
 2,
 2,
 6,
 1,
 2,
 3,
 4,
 3,
 3,
 11,
 2,
 4,
 3,
 6,
 6,
 4,
 3,
 0,
 8,
 9,
 1,
 3,
 8,
 17,
 3,
 2,
 13,
 1,
 1,
 3,
 17,
 20,
 2,
 2,
 3,
 5,
 7,
 2,
 2,
 2,
 3,
 2,
 6,
 5,
 1,
 1,
 3,
 2,
 2,
 2,
 2,
 2,
 4,
 3,
 5,
 2,
 1,
 3,
 0,
 2,
 3,
 8,
 0,
 3,
 3,
 5,
 7,
 3,
 2,
 3,
 1,
 5,
 8,
 3,
 3,
 1,
 3,
 2,
 3,
 3,
 7,
 2,
 3,
 2,
 2,
 3,
 2,
 3,
 2,
 3,
 2,
 5,
 3,
 1,
 3,
 4,
 3,
 18,
 1,
 9,
 0,
 2,
 5,
 2,
 12,
 1,
 3,
 12,
 3,
 2,
 3,
 3,
 32,
 2,
 3,
 2,
 6,
 9,
 4,
 3,
 3,
 2,
 2,
 2,
 3,
 1,
 7,
 3,
 2,
 0,
 3,
 1,
 5,
 2,
 2,
 2,
 1,
 4,
 3,
 2,
 0,
 2,
 2,
 2,
 6,
 0,
 4,
 3,
 4,
 3,
 2,
 2,
 2,
 3,
 2,
 2,
 3,
 4,
 2,
 11,
 2,
 2,
 2,
 2,
 3,
 

# Stats for Number of circles in each circle file

In [10]:
from scipy import stats


In [11]:
import scipy

In [12]:
scipy.stats.describe(filterCircleList)

DescribeResult(nobs=973, minmax=(0, 100), mean=4.177800616649537, variance=33.296543717407026, skewness=7.965901043742478, kurtosis=100.69109310613494)

In [7]:
# G1 = read_nodeadjlist("twitter/78813.edges")

# Define Girvan newman 

In [23]:
def comm_gen(G1, k):
    op = []
    communities_generator = community.girvan_newman(G1)
    for communities in itertools.islice(communities_generator, k):
        op.append((sorted(c) for c in communities))
    return list(op[-1])

In [14]:
import itertools

In [8]:
# communities_generator = community.girvan_newman(G1)

In [9]:
# first_iteration_comm = tuple(sorted(c) for c in next(communities_generator))

In [12]:
# import itertools


# Read Ground Truth Files

In [15]:
def read_circles(filename):
    final_lst = []
    for line in open(filename):
        lst = line.split('\t')
        el, es = lst[0], lst[1:]
        circle  =set()
        for e in es:
            circle.add(int(e))
        final_lst.append(circle)
    return final_lst

# Calculate Total Loss for each Ego Nets

In [16]:
totalLoss = 0
cnt = 0
for ego in egoNodeList:
    G = read_nodeadjlist("twitter/"+ego+".edges")
    predCircle = comm_gen(G, 5)
    gt_circles = read_circles("twitter/"+ego+".circles")
    calLoss = loss1(gt_circles, predCircle)
    totalLoss +=calLoss
    cnt+=1
    print(cnt)
    print(calLoss)
print(totalLoss)

1
199
2
127
3
256
4
185
5
191
6
98
7
123
8
125
9
126
10
127
11
45
12
221
13
162
14
463
15
189
16
188
17
121
18
122
19
60
20
78
21
172
22
197
23
214
24
49
25
200
26
237
27
82
28
57
29
236
30
103
31
82
32
225
33
132
34
129
35
73
36
755
37
233
38
126
39
226
40
71
41
244
42
147
43
89
44
101
45
183
46
40
47
191
48
217
49
101
50
173
51
140
52
134
53
219
54
250
55
274
56
74
57
218
58
69
59
113
60
173
61
169
62
91
63
285
64
183
65
53
66
160
67
81
68
199
69
144
70
82
71
47
72
185
73
69
74
36
75
178
76
158
77
29
78
85
79
188
80
181
81
54
82
73
83
135
84
57
85
120
86
52
87
93
88
219
89
66
90
136
91
81
92
58
93
117
94
138
95
85
96
204
97
158
98
25
99
100
100
192
101
8
102
89
103
138
104
353
105
151
106
87
107
72
108
125
109
85
110
148
111
60
112
136
113
24
114
45
115
79
116
133
117
233
118
67
119
133
120
79
121
30
122
173
123
195
124
115
125
168
126
200
127
169
128
43
129
115
130
12
131
73
132
38
133
65
134
192
135
206
136
107
137
163
138
124
139
175
140
170
141
108
142
158
143
49
144
100
145
81
1

In [18]:
avgLoss = totalLoss/973

# Avg Loss per Node

In [19]:
avgLoss

139.1284686536485