In [36]:
import networkx as nx
import numpy as np
import pandas as pd

import networkx.algorithms.community as nx_comm

import math

def entropy(nums):
    z = np.bincount(nums)
    N = len(nums)
    assert nums.shape == (N, )
    ent = 0.0
    for e in z:
        if e != 0:
            p = float(e) / N
            ent += p*math.log(p)
    assert ent <= 0
    ent = -ent

    assert ent >=0
    return ent
def computeNMI(clusters, classes):

    assert clusters.shape == classes.shape
    A = np.c_[(clusters, classes)]
    A = np.array(A)
    N = A.shape[0]
    assert A.shape == (N, 2)

    H_clusters = entropy(A[:, 0])
    H_classes = entropy(A[:, 1])
    # print H_clusters
    # print H_classes
    # assert N == 17
    NMI = 0.0
    for k in np.unique(A[:, 0]):
        # get elements in second column that have first column equal to j
        z = A[A[:, 0] == k, 1]
        len_wk = len(z)
        t = A[:, 1]
        #for each unique class in z
        for e in np.unique(z):

            wk_cj=len(z[z==e])
            len_cj=len(t[t == e])
            assert wk_cj <= len_cj
            numerator= (float(wk_cj) / float(N)) * math.log( (N*wk_cj) / float(len_wk * len_cj)  )
            NMI += numerator
    NMI /= float((H_clusters + H_classes) * 0.5)

    assert (NMI > 0.0 or abs(NMI) < 1e-10) and (NMI < 1.0 or abs(NMI - 1.0) < 1e-10)
    return NMI



def check_res(res,Adj):
    print(res)
    for i in range(0,len(res)):
        if Adj[i,res[i]] ==0:
            return False
    return True





def community_detection(nodes,edges,population=15,generation=30,r=1.5):
    
    graph=nx.Graph() 
    graph.add_nodes_from(nodes) #adds nodes
    graph.add_edges_from(edges) #add edges
    Adj = nx.adjacency_matrix(graph) 
   
    nodes_length = len(graph.nodes())
    d = {"chrom":[generate_chrom(nodes_length,Adj) for n in range(population)]}
    dframe = pd.DataFrame(data= d)
    dframe["subsets"] = dframe["chrom"].apply(find_subsets)
    dframe["community_score"]=dframe.apply(lambda x: community_score(x["chrom"],x["subsets"],r,Adj),axis=1)
    W = 0.732
    c1 = 0.7
    c2 = 0.3
    
    V = [np.random.randint(-nodes_length/3, nodes_length/3)]*len(dframe)
    dframe["vel"] = V
    
    population_count = population
    gen = 0
    localbest= []
    globalbest=[]
    while gen < generation:
        for i in range(int(np.floor(population/10))):
            
            localbest = dframe
            if gen!=0:
                for index , particle in dframe.iterrows():
                    CC = particle['chrom']
                    LBC = localbest.loc[index]['chrom']
                    GBC = globalbest['chrom']
                    newvel=0
                    for gene in range(0,len(particle['chrom'])):
                        vel = (W*particle['vel']) + c1*(np.random.rand())*(LBC[gene]-CC[gene]) + c2*(np.random.rand())*(GBC[gene]-CC[gene])
                        newgene = CC[gene] + int(vel)
                        if newgene>=0 and newgene<Adj.shape[0] and Adj[gene,newgene] == 1:
                            CC[gene] = newgene
                        else:
                            vel = particle['vel']
                        newvel+=vel
                    newvel /= len(particle['chrom'])
                    newsub = find_subsets(CC)
                    newscore = community_score(CC,newsub,r,Adj)
                    if(newscore>localbest.loc[index]['community_score']):
                        dframe.at[index,'subsets'] = newsub
                        dframe.at[index,'chrom'] = [g for g in CC]
                        dframe.at[index,'community_score'] = newscore
                    dframe.at[index,'vel'] = newvel
            
            globalbest = dframe.sort_values("community_score",ascending=False).index[0]
            globalbest = dframe.loc[globalbest]                   
        gen +=1   
    
    sorted_df = dframe.sort_values("community_score",ascending=False).index[0]
    res = dframe.loc[sorted_df]
    
    
    nodes_subsets = res["subsets"]
    
    res = res['chrom']
    
#     istrue = check_res(res,Adj)
#     print(istrue)
    
    nodes_list = list(graph.nodes())
    result = []
    for subs in nodes_subsets:
        subset = []
        for n in subs:
            subset.append(nodes_list[n])
        result.append(subset)
        
        
        
        
    NMI = 0
    clu = dframe.loc[sorted_df]
    clu = clu['chrom']
    clu = np.array(clu)
    for index, target in dframe.iterrows():
        temp = np.array(target['chrom'])
        x = computeNMI(clu,temp)
        NMI += x
    NMI /= len(dframe)
    print('NMI')
    print(NMI)
    
    print('MODULARITY: ')
    
    modularity = nx_comm.modularity(graph, result)
    print(modularity)
    return result
































# returning the the array of column indexes where randomly chosen till value is 1 
def generate_chrom(nodes_length,Adj):
    chrom = np.array([],dtype=int)
    for x in range(nodes_length):
        rand = np.random.randint(0,nodes_length)
        while Adj[x,rand] != 1:
            rand = np.random.randint(0,nodes_length)
        chrom = np.append(chrom,rand)
    return chrom



def merge_subsets(sub):
    arr =[]
    to_skip=[]
    for s in range(len(sub)):
        if sub[s] not in to_skip:
            new = sub[s]
            for x in sub:
                if sub[s] & x:
                    new = new | x
                    to_skip.append(x)
            arr.append(new)
    return arr

def find_subsets(chrom):
    sub = [{x,chrom[x]} for x in range(len(chrom))]
    result=sub
    i=0
    while i<len(result):
        candidate = merge_subsets(result)
        if candidate != result:
            result = candidate
        else:
            break
        result=candidate
        i+=1
    return result

def community_score(chrom,subsets,r,Adj):
    matrix = Adj.toarray()
    CS=0
    for s in subsets:
        submatrix = np.zeros((len(chrom),len(chrom)),dtype=int)
        for i in s:
            for j in s:
                submatrix[i][j]=matrix[i][j]
        M=0
        v=0
        PS=0
        for row in list(s):
            ki = np.sum(matrix[row])
            kiin = np.sum(submatrix[row])
            kiout = ki - kiin
            P= kiin/ki
            PS+=P
            row_mean = kiin/len(s)
            v+=np.sum(submatrix[row])
            M+=(row_mean**r)/len(s)
        CS+=M*v
    OS= 0.5*CS/len(subsets) + 0.5*(1/PS)*len(subsets)  #Overall score is calculated by maximizing CS and min PS
    return OS

        
    
    

def roulette_selection(df_elites):
    prob = np.random.random_sample()
#     print('prob',prob)
    sum_cs=np.sum(df_elites["community_score"])
#     print(sum_cs)
    x=0
    selected = 0
    for i in df_elites.index:
        x += df_elites["community_score"][i]
            
        X=x/sum_cs
#         print('X',X)
        if prob < X:
            chosen=i
            break
    return chosen

def crossover(parent,trial,crossover_rate):
    if np.random.random_sample() >= crossover_rate:
        length = len(parent['chrom'])
        mask = np.random.randint(2, size=length)
        child = np.zeros(length,dtype=int)
        for i in range(len(mask)):
            if mask[i] == 1:
                child[i]=trial[i]
            else:
                child[i]=parent['chrom'][i]
        return child
    else:
        return trial
    
    
    

def mutation(parent , target , rand1 , rand2 ,Adj,mutation_rate):
    trial = []
   
    for i in range(0,len(parent['chrom'])):
        
        temp = target['chrom'][i] + mutation_rate*(rand1['chrom'][i] - rand2['chrom'][i])
        temp = int(temp)
#         print(temp)
        if temp < 0 or temp >= Adj.shape[0] or Adj[i,temp]==0:
            temp = target['chrom'][i]
        trial.append(temp)
    return trial
























nodes = []
edges = np.loadtxt('football.txt')
for i in edges:
    for j in i:
        if j not in nodes:
            nodes.append(int(j))
    

# print(nodes)
arr = community_detection(nodes,edges)


print(arr)



#nx.draw_networkx_nodes(graph, arr[0], node_color="tab:blue")

  Adj = nx.adjacency_matrix(graph)


NMI
0.8724506961398314
MODULARITY: 
0.2357312604286144
[[1, 42], [2, 25, 34, 91, 111, 26, 72, 90, 104, 106, 4, 15, 73, 85, 103, 50, 11, 89, 108], [83, 112, 5, 10, 17, 24, 94, 105, 65], [52, 51, 36, 19, 55, 58, 102, 32, 62, 30, 31, 45, 6, 12, 59, 92, 29, 56], [114, 66], [13, 18, 35, 39, 28, 100, 93, 63, 88, 96, 97, 71, 76, 77, 27, 67, 57, 54, 82, 49, 87, 70, 84, 98, 113], [80, 95, 37, 64, 44, 81, 60, 38, 46, 21, 110, 86, 20, 14, 43], [101, 107, 40, 3, 7, 16, 48, 61], [75, 47, 41, 53, 115, 99, 68, 74], [33, 9, 22, 23, 79, 78, 109], [8, 69]]
