In [1]:
import pandas as pd
import numpy as np
import itertools
import json

In [28]:
#the 'data' read here would either be "reachability_out.txt" or "web-Stanford-out.txt"
data = pd.read_csv('data', delimiter = '\t', usecols = ['FromNodeId', 'ToNodeId'])

In [None]:
#initialize adjacency lists
min_node = data['FromNodeId'].min()
max_node = data['FromNodeId'].max()

adj_list_away = {}
adj_list_toward = {}

for node in range(min_node, max_node+1):
    adj_list_away[node] = []
    adj_list_toward[node] = []

In [None]:
#add an edge with a given start and end node to a given adjacency list 
def add_edge(adj_list, node, target):
    adj_list[int(node)].append(int(target)) 

In [None]:
for idx, row in data.iterrows():
    add_edge(adj_list_away, row.FromNodeId, row.ToNodeId)
    add_edge(adj_list_toward, row.ToNodeId, row.FromNodeId)

In [None]:
def count_M1(adj_list_away, adj_list_toward):

    vertices = [] #store vertices
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 in adj_list_away[vertex3]) & (vertex1 not in adj_list_toward[vertex3])
                    & (vertex3 not in adj_list_toward[vertex2]) & (vertex2 not in adj_list_toward[vertex1])):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M2(adj_list_away, adj_list_toward):

    vertices = []
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 in adj_list_away[vertex3]) & (vertex1 not in adj_list_toward[vertex3])
                    & (vertex3 not in adj_list_toward[vertex2]) & (vertex2 in adj_list_toward[vertex1])):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M3(adj_list_away, adj_list_toward):

    vertices = []
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 in adj_list_away[vertex3]) & (vertex1 not in adj_list_toward[vertex3])
                    & (vertex3 in adj_list_toward[vertex2]) & (vertex2 in adj_list_toward[vertex1])):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M4(adj_list_away, adj_list_toward):

    vertices = []
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 in adj_list_away[vertex3]) & (vertex1 in adj_list_toward[vertex3])
                    & (vertex3 in adj_list_toward[vertex2]) & (vertex2 in adj_list_toward[vertex1])
                    & (vertex1 != vertex2) & (vertex1 != vertex3) & (vertex2 != vertex3)):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M5(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent in adj_list_away: #checks each parent node
        for child1 in adj_list_away[parent]: #checks first child node
            for child2 in adj_list_away[parent]: #checks second child node
                
                if ((child2 in adj_list_away[child1]) & (child2 not in adj_list_toward[child1])
                    & (child1 not in adj_list_toward[parent]) & (child2 not in adj_list_toward[parent])):
                    vertices.append([parent, child1, child2])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M6(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent in adj_list_away: #checks each parent node
        for child1 in adj_list_away[parent]: #checks first child node
            for child2 in adj_list_away[parent]: #checks second child node
                
                if ((child2 in adj_list_away[child1]) & (child2 in adj_list_toward[child1])
                    & (child1 not in adj_list_toward[parent]) & (child2 not in adj_list_toward[parent])):
                    vertices.append([parent, child1, child2])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M7(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent1 in adj_list_away: #checks each parent node
        for parent2 in adj_list_away: #checks first child node
            for child in adj_list_away[parent1]: #checks second child node
                
                if ((child in adj_list_away[parent2]) & (parent1 in adj_list_away[parent2])
                    & (parent2 in adj_list_away[parent1]) & (child not in adj_list_toward[parent1])
                    & (child not in adj_list_toward[parent2])):
                    
                    vertices.append([parent1, parent2, child])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M8(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent in adj_list_away: #checks each parent node
        for child1 in adj_list_away[parent]: #checks first child node
            for child2 in adj_list_away[parent]: #checks second child node
                
                if ((child2 not in adj_list_away[child1]) & (child2 not in adj_list_toward[child1])
                    & (child1 not in adj_list_toward[parent]) & (child2 not in adj_list_toward[parent])
                    & (child1 != child2)):
                    
                    vertices.append([parent, child1, child2])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M9(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 not in adj_list_away[vertex3]) & (vertex1 not in adj_list_toward[vertex3])
                    & (vertex3 not in adj_list_toward[vertex2]) & (vertex2 not in adj_list_toward[vertex1])
                    & (vertex1 != vertex3)):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M10(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent1 in adj_list_away: #checks each parent node
        for parent2 in adj_list_away: #checks first child node
            for child in adj_list_away[parent1]: #checks second child node
                
                if ((child in adj_list_away[parent2]) & (parent1 not in adj_list_away[parent2])
                    & (parent2 not in adj_list_away[parent1]) & (child not in adj_list_toward[parent1])
                    & (child not in adj_list_toward[parent2]) & (parent1 != parent2)):
                    
                    vertices.append([parent1, parent2, child])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M11(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent in adj_list_away: #checks each parent node
        for child1 in adj_list_away[parent]: #checks first child node
            for child2 in adj_list_away[parent]: #checks second child node
                
                if ((child2 not in adj_list_away[child1]) & (child2 not in adj_list_toward[child1])
                    & (child1 in adj_list_toward[parent]) & (child2 not in adj_list_toward[parent])
                    & (child1 != child2)):
                    
                    vertices.append([parent, child1, child2])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M12(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for vertex1 in adj_list_away: #checks each starting vertex
        for vertex2 in adj_list_away[vertex1]: #access all possible nodes (vertex 2) from vertex 1
            for vertex3 in adj_list_away[vertex2]: #access all possible nodes (vertex 3) from vertex 2
                
                if ((vertex1 not in adj_list_away[vertex3]) & (vertex1 not in adj_list_toward[vertex3])
                    & (vertex3 in adj_list_toward[vertex2]) & (vertex2 not in adj_list_toward[vertex1])
                    & (vertex1 != vertex3)):
                    
                    vertices.append([vertex1, vertex2, vertex3])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
def count_M13(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent in adj_list_away: #checks each parent node
        for child1 in adj_list_away[parent]: #checks first child node
            for child2 in adj_list_away[parent]: #checks second child node
                
                if ((child2 not in adj_list_away[child1]) & (child2 not in adj_list_toward[child1])
                    & (child1 in adj_list_toward[parent]) & (child2 in adj_list_toward[parent])
                    & (child1 != child2)):
                    
                    vertices.append([parent, child1, child2])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [None]:
M1_count, edge_dict_M1 = count_M1(adj_list_away, adj_list_toward)

In [None]:
M2_count, edge_dict_M2 = count_M2(adj_list_away, adj_list_toward)

In [None]:
M3_count, edge_dict_M3 = count_M3(adj_list_away, adj_list_toward)

In [None]:
M4_count, edge_dict_M4 = count_M4(adj_list_away, adj_list_toward)

In [None]:
M5_count, edge_dict_M5 = count_M5(adj_list_away, adj_list_toward)

In [None]:
M6_count, edge_dict_M6 = count_M6(adj_list_away, adj_list_toward)

In [None]:
M7_count, edge_dict_M7 = count_M7(adj_list_away, adj_list_toward)

In [None]:
M8_count, edge_dict_M8 = count_M8(adj_list_away, adj_list_toward)

In [None]:
M9_count, edge_dict_M9 = count_M9(adj_list_away, adj_list_toward)

In [None]:
M10_count, edge_dict_M10 = count_M10(adj_list_away, adj_list_toward)

In [None]:
M11_count, edge_dict_M11 = count_M11(adj_list_away, adj_list_toward)

In [None]:
M12_count, edge_dict_M12 = count_M12(adj_list_away, adj_list_toward)

In [None]:
M13_count, edge_dict_M13 = count_M13(adj_list_away, adj_list_toward)

In [None]:
#save edge counts for later
for i in range(len(edge_dicts)):
    
    new_edge_dict = {}
    for key, value in edge_dicts[i].items():
        new_edge_dict[str(key)] = value
    
    filename = 'edge_dict_M' + str(i+1) + '.json'
    
    with open(filename, 'w') as fp:
        json.dump(new_edge_dict, fp)

In [None]:
#save motif counts for later
motif_counts = [M1_count, M2_count, M3_count, M4_count, M5_count, M6_count, M7_count, 
                M8_count, M9_count, M10_count, M11_count, M12_count, M13_count]

motif_df = pd.DataFrame(motif_counts, columns = {'count'})
motif_df['motif'] = ['M' + str(i+1) for i in range(len(motif_counts))]
motif_df = motif_df[['motif', 'count']]

motif_df.to_csv('motif_counts.csv', index = False)

In [56]:
def count_M7(adj_list_away, adj_list_toward):
    
    vertices = []
    
    for parent1 in adj_list_away: #checks each parent node
        for parent2 in adj_list_away: #checks first child node
            for child in adj_list_away[parent1]: #checks second child node
                
                if ((child in adj_list_away[parent2]) & (parent1 in adj_list_away[parent2])
                    & (parent2 in adj_list_away[parent1]) & (child not in adj_list_toward[parent1])
                    & (child not in adj_list_toward[parent2])):
                    
                    vertices.append([parent1, parent2, child])
                    
    triangles = set(tuple(sorted(l)) for l in vertices) #get rid of permutations of the same triangle
    
    edge_dict = {}
    for tri in triangles:
        
        combos = list(itertools.combinations(tri, 2))

        for edge in combos:

            if edge not in edge_dict.keys(): #if edge doesn't exist yet
                edge_dict[edge] = 1
            else:                            #if edge does exist
                edge_dict[edge] += 1 #add to edge count
    
    return len(triangles), edge_dict

In [54]:
import pandas as pd
import numpy as np
import itertools
import json

def motif_counting(fp, motif):
    
    '''
    Input: filepath to transformed data (fp), motif (type of triangular pattern to count)
    Output: a dictionary mapping each node i to its assigned cluster
    '''
    
    #read in file path
    data = pd.read_csv(fp, delimiter = ' ', usecols = ['FromNodeId', 'ToNodeId'])
    
    #initialize adjacency lists
    min_node = min(data['FromNodeId'].min(), data['ToNodeId'].min())
    max_node = max(data['FromNodeId'].max(), data['ToNodeId'].max())

    adj_list_away = {}
    adj_list_toward = {}

    for node in range(min_node, max_node+1):
        adj_list_away[node] = []
        adj_list_toward[node] = []
        
    #add an edge with a given start and end node to a given adjacency list 
    def add_edge(adj_list, node, target):
        adj_list[int(node)].append(int(target))
    
    for idx, row in data.iterrows():
        add_edge(adj_list_away, row.FromNodeId, row.ToNodeId)
        add_edge(adj_list_toward, row.ToNodeId, row.FromNodeId)
    
    #count the given pattern
    if motif == 'M1':
        motif_count, edge_dict = count_M1(adj_list_away, adj_list_toward)
        
    if motif == 'M2':
        motif_count, edge_dict = count_M2(adj_list_away, adj_list_toward)
        
    if motif == 'M3':
        motif_count, edge_dict = count_M3(adj_list_away, adj_list_toward)
        
    if motif == 'M4':
        motif_count, edge_dict = count_M4(adj_list_away, adj_list_toward)
        
    if motif == 'M5':
        motif_count, edge_dict = count_M5(adj_list_away, adj_list_toward)
        
    if motif == 'M6':
        motif_count, edge_dict = count_M6(adj_list_away, adj_list_toward)
    
    if motif == 'M7':
        motif_count, edge_dict = count_M7(adj_list_away, adj_list_toward)
        
    if motif == 'M8':
        motif_count, edge_dict = count_M8(adj_list_away, adj_list_toward)
        
    if motif == 'M9':
        motif_count, edge_dict = count_M9(adj_list_away, adj_list_toward)
    
    if motif == 'M10':
        motif_count, edge_dict = count_M10(adj_list_away, adj_list_toward)
        
    if motif == 'M11':
        motif_count, edge_dict = count_M11(adj_list_away, adj_list_toward)
        
    if motif == 'M12':
        motif_count, edge_dict = count_M12(adj_list_away, adj_list_toward)
        
    if motif == 'M13':
        motif_count, edge_dict = count_M13(adj_list_away, adj_list_toward)
    
    #display motif count
    print(motif_count)
    
    #return the edge dictionary
    return edge_dict

In [58]:
test_network = pd.read_csv('testdata.txt', delimiter = ' ')

In [61]:
test_network

Unnamed: 0,FromNodeId,ToNodeId
0,1,2
1,1,3
2,1,4
3,1,5
4,2,1
5,2,3
6,2,4
7,2,5
8,6,2
9,6,7


In [62]:
test_network['FromNodeId'].max()

9