# Preprocessing Shapefiles

The script reads in the shapefiles for all of the wards in South Africa and outputs the wards for each of the Gauteng municipalities. The resulting JSON files then need to be moved to <code>data/shapefiles/MUNICIPALITY NAME/MUNICIPALITY NAME_shp.json</code> for use by the proceeding scripts.

The output of this script is already in <code>data/shapefiles/MUNICIPALITY NAME/MUNICIPALITY NAME_shp.json</code>. Thus, this script does not have to be run.

In [1]:
import json
import os

In [2]:
BASE_DIR = '../..'
DATA_DIR = os.path.join(BASE_DIR, 'data')
SHAPE_DIR = os.path.join(DATA_DIR, 'shapefiles')

In [3]:
# Reading JSON file of ward shapes
def get_all_ward_shapes(file_name):
    f = open(file_name,) 
    shape_dict = json.load(f)                     # Returns JSON object as a dictionary 
    f.close()
    return shape_dict

In [4]:
# reading in all of the SA wards into a dictionary
shape_dict = get_all_ward_shapes(os.path.join(SHAPE_DIR, 'SA_municipal_wards_2011.json'))

In [5]:
# very big dictionary!
len(shape_dict['features'])

8554

In [6]:
# need to use this information to obtain wards for Gauteng municipalities
shape_dict['features'][1000]['properties']

{'OBJECTID': 1001,
 'ProvinceCo': 'LIM',
 'ProvinceNa': 'Limpopo',
 'LocalMunic': 'LIM332',
 'WardNumber': '3',
 'WardID': '93302003',
 'LocalMun_1': 'Greater Letaba',
 'DistrictMu': 'DC33',
 'District_1': 'Mopani',
 'Year': 2011,
 'Shape__Are': 2975910.41699219,
 'Shape__Len': 9354.68949928675}

In [7]:
# sorting municipality's results by ward number
def sort_by_ward(sub_li): 
    # reverse = None (Sorts in Ascending order) 
    # key is set to sort using second element of  
    # sublist lambda has been used 
    sub_li.sort(key = lambda x: int(x['properties']['WardNumber'])) 
    return sub_li 

In [8]:
# removing any duplicate entries for a ward
def remove_duplicates(items):
    items_no_duplicates = []
    ward_nums = []
    for feat in items:
        ward_num = feat['properties']['WardNumber']
        if ward_num not in ward_nums:
            items_no_duplicates.append(feat)
            ward_nums.append(ward_num)
    return items_no_duplicates

In [9]:
# extracts the shapefile for a particular municipality and writes to JSON
def get_munic_shp(name):
    shape_dict_copy = shape_dict.copy()
    ward_nums = []
    items_no_duplicates = []
    items = [el for el in shape_dict['features'] if el['properties']['LocalMun_1'] == name and el['properties']['ProvinceNa'] == 'Gauteng']
    
    # Rand West City is a combination of Westonaria and Randfontein. So we need to treat it separately.
    if name != 'Rand West City':
        items_no_duplicates = remove_duplicates(items)
        items_no_duplicates_sorted = sort_by_ward(items_no_duplicates)
        print("There are {} wards in {}".format(len(items_no_duplicates_sorted), name))
        shape_dict_copy['features'] = items_no_duplicates_sorted
        out_file = open("{}_shp.json".format(name), "w") 
        json.dump(shape_dict_copy, out_file) 
        out_file.close() 

In [28]:
# the names of the Guateng municipalities. Rand West City is now two municipalities (Westonaria and Randfontein), and needs to be split accordingly.
gau_municipalities = set([el['properties']['LocalMun_1'] for el in shape_dict['features'] if el['properties']['ProvinceNa'] == 'Gauteng'])
gau_municipalities

{'City of Johannesburg',
 'City of Tshwane',
 'Ekurhuleni',
 'Emfuleni',
 'Lesedi',
 'Merafong City',
 'Midvaal',
 'Mogale City',
 'Rand West City'}

In [29]:
# obtain shapefiles for each municipality (except Rand West City) and write to json
for munic_name in gau_municipalities:
    get_munic_shp(munic_name)

There are 101 wards in Ekurhuleni
There are 45 wards in Emfuleni
There are 28 wards in Merafong City
There are 13 wards in Lesedi
There are 14 wards in Midvaal
There are 130 wards in City of Johannesburg
There are 105 wards in City of Tshwane
There are 34 wards in Mogale City


## Accounting for issue with Rand West City

The shapefiles (JSON) that are now in your current directory should be moved to <code>data/shapefiles/MUNICIPALITY NAME/MUNICIPALITY_shp.json</code> for use by the other scripts.

Rand West City needs to broken up into Westonaria and Randfontein. This will be done now:

In [27]:
shape_dict_copy = shape_dict.copy()
items = [el for el in shape_dict['features'] if el['properties']['LocalMun_1'] == 'Rand West City' and el['properties']['ProvinceNa'] == 'Gauteng']

randfontein = []
westonaria = []
i = 0
for el in items:
    i += 1
    # working out which wards are from which municipality
    if el['properties']['WardID'][4] == '2':
        randfontein.append(el)
    elif el['properties']['WardID'][4] == '3':
        westonaria.append(el)

randfontein_no_duplicates = remove_duplicates(randfontein)
randfontein_no_duplicates_sorted = sort_by_ward(randfontein_no_duplicates)

westonaria_no_duplicates = remove_duplicates(westonaria)
westonaria_no_duplicates_sorted = sort_by_ward(westonaria_no_duplicates)

print("There are {} wards in {}".format(len(randfontein_no_duplicates_sorted), "Randfontein"))
print("There are {} wards in {}".format(len(westonaria_no_duplicates_sorted), "Westonaria"))

shape_dict_copy['features'] = sort_by_ward(randfontein_no_duplicates_sorted)
out_file = open("{}_shp.json".format('Randfontein'), "w") 
json.dump(shape_dict_copy, out_file) 
out_file.close() 

shape_dict_copy['features'] = sort_by_ward(westonaria_no_duplicates_sorted)
out_file = open("{}_shp.json".format('Westonaria'), "w") 
json.dump(shape_dict_copy, out_file) 
out_file.close() 

There are 22 wards in Randfontein
There are 16 wards in Westonaria
