In [None]:
import igraph as ig
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import collections

# Reading datasets

## Karate data

In [None]:
G = ig.read('karate.gml')

## wikipedia data

In [None]:
df = pd.read_csv('soc-wiki-Vote.mtx',sep=' ',names = ['a','b'])
tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## university email data

In [None]:
df = pd.read_csv('email-univ.edges',sep=' ',names = ['a','b'])
tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## hamsterster data

In [None]:
df = pd.read_csv('soc-hamsterster.edges',sep=' ',names = ['a','b'])
tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## tvshow pages data

In [None]:
df = pd.read_csv('fb-pages-tvshow.edges',sep=',',names = ['a','b'])
tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## Facebook data

In [None]:
df = pd.read_csv('facebook_combined.txt',sep=' ',names = ['a','b'])

tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## Politician pages data

In [None]:
df = pd.read_csv('fb-pages-politician.edges',sep=',',names = ['a','b'])
tuples = [tuple(x) for x in df.values]
G = ig.Graph.TupleList(tuples)

## last.fm data

In [None]:
test = pd.read_csv('lastfm_asia_edges.csv',sep=',')

tuples = [tuple(x) for x in test.values]
G = ig.Graph.TupleList(tuples)

## Deezer europe network

In [None]:
test = pd.read_csv('deezer_europe_edges.csv',sep=',')

tuples = [tuple(x) for x in test.values]
G = ig.Graph.TupleList(tuples)

# Pre-processing

In [None]:
#k-shell decompostion
K = G.shell_index()
G.vs['shell'] = K

In [None]:
nodes = []
for x in G.vs.indices:
    nodes.append(x)

global_core = [x for x in nodes if K[x] == max(K)] #finding indices of network core elements
global_core_name = [x['name'] for x in G.vs if x['shell'] == max(K)] # finds the list of node names in a network
#[i for i in G.vs] # to understand what name of node means

In [None]:
#finding modulairty of the network
clus = G.community_multilevel()
G.modularity(clus.membership)

In [None]:
#finding number of communities in the network
comm_count = 0
for x in [i for i in G.community_multilevel()]:
    comm_count += 1
    for node in x:
        G.vs[node]['community'] = comm_count

In [None]:
#finding number of elements from each shell in a community
count =0
fData = dict()
for num in range(1,comm_count+1): 
    data = dict()
    count += 1
    for i in [i for i in G.vs if i['community'] == num]:
        data[i['shell']] = data.get(i['shell'], 0) + 1
        fData[count] = data

In [None]:
#function to find average shell number of a community
def avg_shell(x):
    sum_s=0
    sum_w=0
    for j in range(0,len(x)):
        sum_s=sum_s+x[j][0]*x[j][1]
        sum_w=sum_w+x[j][1]
    return(sum_s/sum_w)

In [None]:
#function to find standard deviation of shell number in a community
def std_div(x):
    deviations=0
    sum_e=0
    for i in range(0,len(x)):
        deviations = deviations + (((x[i][0] - avg_shell(x)) ** 2)*x[i][1] )
        sum_e=sum_e+x[i][1]
    variance = deviations / sum_e
    return((variance)**(1/2))

In [None]:
#function to find dispersion index of each commmunity
def dispersion(x):
    return(((std_div(x))**2)/avg_shell(x))

In [None]:
#function to find size of each community
def com_size(x):
    a=0
    for i in range(0,len(x)):
        a=a+x[i][1]
    return(a)

## Entropy

In [None]:
def getEntropy(i):
    test = [j for i,j in fData[i].items()]
    l = sum(test)
    tmp = []
    for i in test:
        dum = i/l
        tmp.append( dum * np.log2(dum))
    return -sum(tmp)
Entropy = [getEntropy(i) for i in range(1,comm_count+1)]

## Influence measure

In [None]:
def getInfluence(commno):
    tmp = []
    for t in list(fData[commno].keys()):
        tmp.append(len([i for i in G.vs.indices if K[i] == t]))

    val = list(fData[commno].values())
    shel = list(fData[commno].keys())

    sumt= []
    for i in range(len(tmp)):
        sumt.append(shel[i]*val[i]/tmp[i])
    return sum(sumt)
Influence = [getInfluence(i) for i in range(1,comm_count+1)]

## Writing data into dataframe

In [None]:
data=[]
for i in range(1,len(fData)+1):
    od = collections.OrderedDict(sorted(fData[i].items()))#sorts the data of shell distribution in a community which is stored in fData w.r.t the shell number for each community 
    od = list(od.items())
    data.append([i,com_size(od),avg_shell(od),std_div(od),dispersion(od)])
d = pd.DataFrame(data,columns=["comm_no","comm_size","mean","std_dev","dispersion"])


In [None]:
# Adding entropy to the dataframe
d['Entropy'] = Entropy
d['Influence'] = Influence

In [None]:
# sorting the data frame with respective to the community size(ascending order).
d.sort_values('comm_size')

In [None]:
# d.to_excel('Results_dr.xlsx',sheet_name='Sheet1')

## 5.1 Comparison between most influential and largest communities

In [None]:
comm_num1 = 13 #values change for each data set
comm_num2 = 4 #values change for each data set
od_1 = collections.OrderedDict(sorted(fData[comm_num1].items())) #sorts the data by shell numbers for community 1
od_2 = collections.OrderedDict(sorted(fData[comm_num2].items())) #sorts the data by shell numbers for community 2

X1 = list(od_1.keys()) #shell numbers of first community
X2 = list(od_2.keys()) #shell numbers of second community
Y1 = list(od_1.values())#number of nodes in each shell for first community
Y2 = list(od_2.values())#number of nodes in each shell for second community

plt.figure(figsize=(10,5))
plt.xlabel('Shell Number')
plt.ylabel('Number of Nodes')
plt.plot(X1,Y1,label='Community_13')
plt.plot(X2,Y2,label='Community_4')
plt.legend()
plt.savefig('NodesinEachShell_Comapre.jpg')

## 5.2 Correlation of Shell Number and Fraction of nodes present in them

In [None]:
def getPlot(fData,i):
    od = collections.OrderedDict(sorted(fData[i].items())) #sorts the data by shell numbers for each community
    od = list(od.items())
    
    shell_len = dict()
    for k in list(set(K)): #finds the size of each shell
        shell_len[k] = len([i for i in G.vs if i['shell'] == k]) 
    
    r=[]
    for i in range(0,len(od)): # finds the fraction of nodes present in each shell in a community with respective to the netwwork
        r.append([od[i][0],(od[i][1])/shell_len[od[i][0]]])
        
    tmp = [i for i,j in r if j == 1]
    R = [(i,j) for i,j in r if i not in tmp] #ignores the shell with only one node
    
    x = [i for i,j in R] # shell number 
    y = [j for i,j in R] # fraction of nodes present in shell number
    plt.figure(figsize=(15,5))
    plt.xticks(x,rotation=90)
    plt.xlabel('Shell Number')
    plt.ylabel('Fraction of nodes present')
#     plt.show()
    p = plt.plot(x,y)
    plt.savefig('test.{}.jpg'.format(i))
    return p

In [None]:
getPlot(fData,4) #change the number to get plot for each community 

## 6.2 Entropy and mean shell distribution:

In [None]:
X = list(d['Entropy'])
Y = list(d['mean'])
plt.figure(figsize=(12,7))
plt.scatter(X,Y)
plt.xlabel('Entropy')
plt.ylabel('Weighted Mean')
plt.savefig('EntropyVSMeanShell_deezer.jpg')

## 6.3 Entropy and influence measure:

In [None]:
Y = list(d['comm_no'])
X = list(d['Entropy'])
A = list(d['Influence'])
A = [i*10 for i in A]
colors = np.random.rand(comm_count)
plt.figure(figsize=(15,7))
plt.scatter(X,Y,s=A,c=colors,alpha=0.5)
plt.xlabel('Entropy')
plt.ylabel('Community Number')
plt.savefig('EntropyVSCommno_Dezzer.jpg')

## 7 Global core and Local core properties

In [None]:
#A function to find interstion between two sets
def common_elements(a,b):
    a.sort()
    b.sort()
    i,j=0,0
    intersection=[ ]
    while i<len(a) and j<len(b):
        if a[i] == b[j]:
            intersection.append(a[i])
            i=i+1
            j=j+1
        elif a[i]<b[j]:
            i=i+1
        else:
            j=j+1
    return(len(intersection))

In [None]:
jaccard = []
for i in range(0,comm_count):
    G_comm = G.subgraph(G.community_multilevel()[i]) #subgraphs each community and stores in a new variable
    K_local = G_comm.shell_index() #finds nodes shell number w.r.t to new subgraph
    nodes_local = []
    for x in G_comm.vs: #finds the nodes indices w.r.t to new subgrpah
        nodes_local.append(x)
    local_core = [x['name'] for x in nodes_local if K_local[x.index] == max(K_local)] #finds the local of each community
    U = len(global_core_name)
    V = len(local_core)
    intersection = common_elements(global_core_name,local_core)
    union = U + V - intersection
    jaccard.append(intersection/union)

In [None]:
fraction_of_community_nodes_in_local_core=[]
fraction_of_community_nodes_in_gobal_core=[]
fraction_of_global_core_in_local_core=[]
fraction_of_local_core_in_global_core=[]

for i in range(0,comm_count):
    G_comm = G.subgraph(G.community_multilevel()[i]) #subgraphs each community and stores it in G_comm
    K_local = G_comm.shell_index() 
    G_comm.vs['shell'] = K_local #finds nodes shell number w.r.t to new subgraph
    nodes_local = []
    for y in G_comm.vs.indices:#finds the nodes indices w.r.t to new subgrpah
        nodes_local.append(y) 
    local_core = [G.community_multilevel()[i][x] for x in nodes_local if K_local[x] == max(K_local)] #finds the local of each community
    fraction_of_community_nodes_in_local_core.append(round(len(local_core)/len(G.community_multilevel()[i]),3)) #fraction of community nodes in local core
    fraction_of_community_nodes_in_gobal_core.append(round(len(list(set(G.community_multilevel()[i]).intersection(global_core)))/len(G.community_multilevel()[i]),3)) #fraction of community nodes in gobal core
    fraction_of_global_core_in_local_core.append(round(len(list(set(local_core).intersection(global_core)))/len(global_core),3)) #fraction of global core in local core
    fraction_of_local_core_in_global_core.append(round(len(list(set(local_core).intersection(global_core)))/len(local_core),3)) #fraction of local core in global core

In [None]:
data=[]
for i in range(0,comm_count):    
    data.append([jaccard[i],fraction_of_community_nodes_in_local_core[i],fraction_of_community_nodes_in_gobal_core[i],fraction_of_global_core_in_local_core[i],fraction_of_local_core_in_global_core[i]])     
d = pd.DataFrame(data,columns=["jaccard","fract_comm_local","fract_comm_gobal","fract_global_local","fract_local_global"])
d.to_excel('task2_deezer.xlsx',sheet_name='Sheet1')

In [None]:
d