In [12]:
"""
Vignette 2

Add node and graph attributes to each building json file

"""

import json
import os
import networkx as nx
from networkx.readwrite import json_graph

# list paths for all monastery .JSONs
mon_path = os.path.join(os.path.dirname(os.getcwd()), 'data\monastaries\jsons_named')
mon_files = []
for i in os.listdir(mon_path):
    if i.endswith('.json'):
        mon_files.append(os.path.join(mon_path, i))

# list of paths for all mosque .JSONs
not_used = ['MS1550.json', 'MS1562.json']
mos_path = os.path.join(os.path.dirname(os.getcwd()), 'data\mosques\jsons_named')
mos_files = []
for i in os.listdir(mos_path):
    if i.endswith('.json') and i not in not_used:
        mos_files.append(os.path.join(mos_path, i))                

print("monasteries: ", len(mon_files))
print("mosques :", len(mos_files))

monasteries:  19
mosques : 20


In [13]:
"""
open each json
convert to networkX graph

analyze each node
add analysis results as node attributes

analyze graph
add analysis results as graph attributes

NetworkX algorithms nodes:
- betweenness centrality
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.centrality.betweenness_centrality.html#networkx.algorithms.centrality.betweenness_centrality

- degree centrality
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.centrality.degree_centrality.html#networkx.algorithms.centrality.degree_centrality

- eignenvector centrality
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.centrality.eigenvector_centrality.html#networkx.algorithms.centrality.eigenvector_centrality

- closeness centrality
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.centrality.closeness_centrality.html#networkx.algorithms.centrality.closeness_centrality

- local clustering coefficient
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.cluster.clustering.html

NetworkX algorithms graph
- average local clustering coefficient
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.cluster.average_clustering.html

- global clustering
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.cluster.transitivity.html#networkx.algorithms.cluster.transitivity

- clique analysis
https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.
algorithms.clique.find_cliques.html#networkx.algorithms.clique.find_cliques

"""

def add_node_attributes(bldg, bldg_type, bldg_name):
    """ Add betweenness centrality, degree centrality, eigenvector centrality, closeness centrality
     and local clustering coefficient to each node as an attribute"""
    
    nx.set_node_attributes(bldg, bldg_type, "building_type")
    nx.set_node_attributes(bldg, bldg_name, "building_name")
    
    bc = nx.betweenness_centrality(bldg)
    nx.set_node_attributes(bldg, bc, "betweenness")
    
    dc = nx.degree_centrality(bldg)
    nx.set_node_attributes(bldg, dc, "degree_centrality")
    
    ec = nx.eigenvector_centrality(bldg)
    nx.set_node_attributes(bldg, ec, "eigenvector_centrality")
    
    cc = nx.closeness_centrality(bldg)
    nx.set_node_attributes(bldg, cc, "closeness_centrality")
    
    for node in bldg.nodes:
        bldg.add_node(node, degree=nx.degree(bldg, node))
        
    clc = nx.clustering(bldg)
    nx.set_node_attributes(bldg, clc, "clustering_coef")

def add_graph_attributes(bldg, bldg_type, bldg_name):
    """ Add number of nodes, density, average cluster coefficient and transistivity
    to graph as attributes"""

    bldg.graph['building_type'] = bldg_type
    bldg.graph['building_name'] = bldg_name
    bldg.graph['num_nodes'] = nx.number_of_nodes(bldg)
    bldg.graph['num_edges'] = nx.number_of_edges(bldg)
    bldg.graph['density'] = nx.density(bldg)
    bldg.graph['ave_cluster_coef'] = nx.average_clustering(bldg)
    # global clustering coefficient
    bldg.graph['transistivity'] = nx.transitivity(bldg)
    # create list of cliques greater than 2
    cliques = [clique for clique in list(nx.find_cliques(bldg)) if len(clique) > 2]
    bldg.graph['cliques'] = cliques    


# loop through all buildings to create single JSON
files = mon_files + mos_files
for file in files:
    with open(file) as f:
        data = json.load(f)
        
        # determine building type from file name
        i = file.rfind('\\') + 1
        filename = file[i:]
        bldg_name = filename[:6]
        if (filename.startswith('MN')):
            bldg_type = 'monastery'
        else:
            bldg_type = 'mosque'
    
    # construct networkx graph and add node and graph attributes
    G = json_graph.node_link_graph(data, {"name": "id"})
    add_node_attributes(G, bldg_type, bldg_name)
    add_graph_attributes(G, bldg_type, bldg_name)
    
    # save graph as new json file
    if bldg_type == 'mosque':
        path = os.path.join(os.path.dirname(os.getcwd()), 'data\mosques\jsons_features')
    else:
        path = os.path.join(os.path.dirname(os.getcwd()), 'data\monastaries\jsons_features')

    out_path = os.path.join(path, filename)
    out = json_graph.node_link_data(G, {"name": "id"})
    with open(out_path, 'w', encoding ='utf8') as json_file: 
        json.dump(out, json_file, indent=4)
#         print(f'saving: {filename}')

print('done')


done


In [14]:
"""
Create CSV file for building instances
Create CVS file for node instances

"""

import csv

# list paths for all monastery .JSONs
mon_path = os.path.join(os.path.dirname(os.getcwd()), 'data\monastaries\jsons_features')
mon_files = []
for i in os.listdir(mon_path):
    if i.endswith('.json'):
        mon_files.append(os.path.join(mon_path, i))

# list of paths for all mosque .JSONs
not_used = ['MS1550.json', 'MS1562.json']
mos_path = os.path.join(os.path.dirname(os.getcwd()), 'data\mosques\jsons_features')
mos_files = []
for i in os.listdir(mos_path):
    if i.endswith('.json') and i not in not_used:
        mos_files.append(os.path.join(mos_path, i))                

print("monasteries: ", len(mon_files))
print("mosques :", len(mos_files))

monasteries:  19
mosques : 20


In [15]:
"""
Open each json file and save data to buildings and nodes csv files

"""

buildings = os.path.join(os.path.dirname(os.getcwd()), 'data\all\buildings.csv')
bldgs_header = []
bldgs_rows = []

nodes = os.path.join(os.path.dirname(os.getcwd()), 'data\all\nodes.csv')
nodes_header = []
nodes_rows = []

# Open each json file. Add each building and each node to individual row for csvs
files = mon_files + mos_files
for file in files:
    with open(file) as f:
        data = json.load(f)
        
        # buildings rows
        if bldgs_header == []:
            bldgs_header = list(data['graph'].keys())
        bldgs_rows.append(list(data['graph'].values())) 
           
        # node rows    
        if nodes_header == []:
            nodes_header = list(data['nodes'][0].keys())
        for node in data['nodes']:
            nodes_rows.append(list(node.values()))

# Add normalized columns of building num_nodes and num_edges
i = bldgs_header.index('num_nodes')
j = bldgs_header.index('num_edges')
max_nodes = max([row[i] for row in bldgs_rows])
max_edges = max([row[j] for row in bldgs_rows])
bldgs_header.extend(['num_nodes_norm', 'num_edges_norm'])
for row in bldgs_rows:
    row.extend([row[i] / max_nodes, row[j] / max_edges])

# Add normalized columns of node area, iso_area and degree
i = nodes_header.index('area')
j = nodes_header.index('iso_area')
k = nodes_header.index('degree')
max_area = max([row[i] for row in nodes_rows])
max_iso_area = max([row[j] for row in nodes_rows])
max_degree = max([row[k] for row in nodes_rows])
nodes_header.extend(['area_norm', 'iso_area_norm', 'degree_norm'])
for row in nodes_rows:
    row.extend([row[i] / max_area, row[j] / max_iso_area, row[k] / max_degree])

# Write buildings csv
with open(buildings, 'w', newline="", encoding='utf-8') as outFile: 
    writer = csv.writer(outFile)
    writer.writerow(bldgs_header)
    for row in bldgs_rows:
        writer.writerow(row)

# Write nodes csv
with open(nodes, 'w', newline="", encoding='utf-8') as outFile: 
    writer = csv.writer(outFile)
    writer.writerow(nodes_header)
    for row in nodes_rows:
        writer.writerow(row)
    
print("done")

done
