# Overview of Methods

For each ward:

1. Use the shape file to extract the coordinates for the ward
2. Calculate a bounding box around ward. 
3. Get lat/lng of centers for each image in grid.
4. Write to /data/ward_image_centers.csv.


# Libraries Used

In [1]:
import numpy as np
import json
import pandas as pd
import os

# Constants

### Conversion of lat/lng to pixels

In order to sample without overlap from the grid of images within the ward, we need to obtain the length of the sampled images in the y- and the x-direction.

In [2]:
## Radius of squares in grid, figured out by mixture of observation and calculation
Y_RAD = 0.00385
X_RAD = 0.0043

Edit to change the municipality of interest. Possible values are: 
1. 'Emfuleni',
2. 'Merafong City',
3. 'Midvaal',
4. 'Lesedi',
5. 'Mogale City',
6. 'Ekurhuleni',
7. 'Randfontein',
8. 'City of Tshwane',
9. 'City of Johannesburg'
10. 'Westonaria'

In [202]:
MUNICIPALITY = 'Westonaria'

In [203]:
BASE_DIR = '..'
DATA_DIR = os.path.join(BASE_DIR, 'data')
SHAPE_DIR = os.path.join(BASE_DIR, 'shapefile', MUNICIPALITY)

# Get Centers for Images in each Ward

Extract the unadjusted image centers for each image in the grid for each ward.

In [204]:
# Opening JSON file of ward shapes
def get_all_ward_shapes(file_name):
    f = open(file_name,) 
    shape_dict = json.load(f)                     # Returns JSON object as a dictionary 
    f.close()
    return shape_dict

In [205]:
## Extracting coordinates for first ward
def get_ward_shape(shape_dict, ward_num):
    coords = shape_dict['features'][ward_num]["geometry"]["coordinates"][0]
    return coords

In [206]:
## Extracting max and mins for outer bounds of grid
def get_min_max(coords):
    flatten_coords = [item for sublist in coords for item in sublist]   # flattening the nested list
    max_lng = max(flatten_coords[::2])                                  # lngs are even values
    min_lng = min(flatten_coords[::2])
    max_lat = max(flatten_coords[1::2])                                 # lats are odd values
    min_lat = min(flatten_coords[1::2])
    return (max_lat, min_lat, max_lng, min_lng) 

In [207]:
## Extracting all of the centers for the rectangles in the grid
def extract_centers(max_lat, min_lat, max_lng, min_lng):
    center_top_left = (max_lat - Y_RAD, min_lng + X_RAD)    # Center of the top left rectangle in the grid
    center = center_top_left
    center_list = []
    while(center[0] + Y_RAD > min_lat):                     # Loop for changing rows
        row_centers = [center]
        while(center[1] + X_RAD < max_lng):                   # Loop for moving along row
            center = (center[0], center[1] + 2*X_RAD)
            row_centers.append(center)
        center_list.append(row_centers)
        center = (center[0] - 2*Y_RAD, center_top_left[1])
    return center_list

In [208]:
# dictionary of coordinates for all the wards
shape_file_dict = get_all_ward_shapes(os.path.join(SHAPE_DIR, "{}_shp.json".format(MUNICIPALITY)))

In [209]:
N_WARDS = len(shape_file_dict['features'])
N_WARDS

16

In [210]:
# Constructing path for ward outline in the Google Static Maps Api image.
def coords_to_path(coords):
    path = ""
    for lat, lng in coords:
        path = path + "|" + str(lng) + "," + str(lat)
    return path

In [211]:
# Checking Google Static Maps API path length retrictions. If this fails, look to 'simplify' the shapefile using mapshaper.
for ward_num in range(1, N_WARDS+1):
    coords = get_ward_shape(shape_file_dict, ward_num-1)
    path = coords_to_path(coords)
    print(len(path))
    assert len(path) < 8192, "path is too long: error for ward {}".format(ward_num)

2552
1564
995
637
2697
1513
632
956
1513
421
515
886
608
450
1164
1600


In [212]:
# dataframe for storing the image centers for each ward
df_ward_centers = pd.DataFrame(0, index=np.arange(N_WARDS), columns=["ward_no.", "img_centers"])
df_ward_centers["img_centers"] = df_ward_centers["img_centers"].astype('object') # converting to object to be able to store lists
print(df_ward_centers.dtypes)
df_ward_centers.head()

ward_no.        int64
img_centers    object
dtype: object


Unnamed: 0,ward_no.,img_centers
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [213]:
# for each ward extract the image centers
for ward_num in range(1, N_WARDS+1):
    coords = get_ward_shape(shape_file_dict, ward_num-1)               # coordinates for ward
    max_lat, min_lat, max_lng, min_lng = get_min_max(coords)           # for bounding box
    center_list = extract_centers(max_lat, min_lat, max_lng, min_lng)  # get all the centers for ward
    df_ward_centers.at[ward_num-1, 'img_centers'] = center_list        # add the list of centers to the df
    df_ward_centers.at[ward_num-1, 'ward_no.'] = ward_num              # add the ward number to the df

In [214]:
print(df_ward_centers.shape)
df_ward_centers.head()

(16, 2)


Unnamed: 0,ward_no.,img_centers
0,1,"[[(-26.324598981, 27.5499900030001), (-26.3245..."
1,2,"[[(-26.3679599957238, 27.539619995970202), (-2..."
2,3,"[[(-26.397681949, 27.591804930000002), (-26.39..."
3,4,"[[(-26.316285607999898, 27.6388254030001), (-2..."
4,5,"[[(-26.246443765, 27.5295099960001), (-26.2464..."


In [215]:
# write to csv for safe keeping
if not os.path.exists(os.path.join(DATA_DIR, 'image_centers')):   # creating ward directory if it doesn't exist
    os.makedirs(os.path.join(DATA_DIR, 'image_centers'))
df_ward_centers.to_csv(os.path.join(DATA_DIR, 'image_centers', '{}_ward_image_centers.csv'.format(MUNICIPALITY)), index=False)