In [1]:
"""
Vignette 2

Join all buildings into single graph of multiple components
"""

import json
import os

# list paths for all monastery .JSONs
mon_path = os.path.join(os.path.dirname(os.getcwd()), 'data\monastaries\jsons_features')

mon_files = []
for i in os.listdir(mon_path):
    if i.endswith('.json'):
        mon_files.append(os.path.join(mon_path, i))

# list of paths for all mosque .JSONs
mos_path = os.path.join(os.path.dirname(os.getcwd()), 'data\mosques\jsons_features')

mos_files = []
for i in os.listdir(mos_path):
    if i.endswith('.json'):
        mos_files.append(os.path.join(mos_path, i))                

print("monasteries: ", len(mon_files))
print("mosques :", len(mos_files))


monasteries:  19
mosques : 20


In [14]:
"""
Combine all jsons into single json
"""

# empty dictionary
all_buildings = {
                 'directed': False,
                 'multigraph': False,
                 'graph': [],
                 'nodes': [],
                 'links': []
                }

# loop through all buildings to create single JSON
files = mon_files + mos_files
for file in files:
    with open(file) as f:
        data = json.load(f)
    all_buildings['graph'].append(data['graph'])
    all_buildings['nodes'].extend(data['nodes'])
    all_buildings['links'].extend(data['links'])

# save JSON
path = os.path.join(os.path.dirname(os.getcwd()), 'data\all\all_buildings_features.json')

with open(path, 'w', encoding ='utf8') as json_file: 
    json.dump(all_buildings, json_file, indent=4)

print('buildings: ', len(all_buildings['graph']))
print('nodes: ', len(all_buildings['nodes']))
print('edges: ', len(all_buildings['links']))

# Why are these quantities different than the first time I did this?

buildings:  39
nodes:  1569
edges:  1957


In [25]:
"""
Save all_buildings as two csv files - buildings file and nodes file

Create CSV of just standard features before adding embeddings in later feature

"""

import csv

# node rows    
nodes_header = list(all_buildings['nodes'][0].keys())
nodes_rows = []
for node in all_buildings['nodes']:
    nodes_rows.append(list(node.values()))

# Add normalized columns of node area, iso_area and degree
i = nodes_header.index('area')
j = nodes_header.index('iso_area')
k = nodes_header.index('degree')
max_area = max([row[i] for row in nodes_rows])
max_iso_area = max([row[j] for row in nodes_rows])
max_degree = max([row[k] for row in nodes_rows])
nodes_header.extend(['area_norm', 'iso_area_norm', 'degree_norm'])
for row in nodes_rows:
    row.extend([row[i] / max_area, row[j] / max_iso_area, row[k] / max_degree])

# Write nodes csv
nodes = os.path.join(os.path.dirname(os.getcwd()), 'data\all\all_nodes_features.csv')

with open(nodes, 'w', newline="", encoding='utf-8') as outFile: 
    writer = csv.writer(outFile)
    writer.writerow(nodes_header)
    for row in nodes_rows:
        writer.writerow(row)
        
# building rows
bldgs_header = list(all_buildings['graph'][0].keys())
bldgs_rows = []
for bldg in all_buildings['graph']:
    bldgs_rows.append(list(bldg.values()))

# Add normalized columns of building num_nodes and num_edges
i = bldgs_header.index('num_nodes')
j = bldgs_header.index('num_edges')
max_nodes = max([row[i] for row in bldgs_rows])
max_edges = max([row[j] for row in bldgs_rows])
bldgs_header.extend(['num_nodes_norm', 'num_edges_norm'])
for row in bldgs_rows:
    row.extend([row[i] / max_nodes, row[j] / max_edges])
    
# write buildings csv
bldgs = os.path.join(os.path.dirname(os.getcwd()), 'data\all\all_buildings_features.csv')

with open(bldgs, 'w', newline="", encoding='utf-8') as outFile: 
    writer = csv.writer(outFile)
    writer.writerow(bldgs_header)
    for row in bldgs_rows:
        writer.writerow(row)
    
print("done")


done
