# Overview of Methods

For each ward:

1. Use the shape file to extract the coordinates for the ward
2. Calculate a bounding box around ward. 
3. Get lat/lng of centers for each image in grid.
4. Write to /data/ward_image_centers.csv.


# Libraries Used

In [7]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import json
import requests
import pandas as pd
import os
import ast

# Constants

### Conversion of lat/lng to pixels

In order to sample without overlap from the grid of images within the ward, we need to obtain the length of the sampled images in the y- and the x-direction.

In [8]:
## Radius of squares in grid, figured out by mixture of observation and calculation
Y_RAD = 0.00385
X_RAD = 0.0043
N_WARDS = 130

In [27]:
BASE_DIR = '..'
DATA_DIR = os.path.join(BASE_DIR, 'data')
SHAPE_DIR = os.path.join(BASE_DIR, 'shapefile')

# Get Centers for Images in each Ward

Extract the unadjusted image centers for each image in the grid for each ward.

In [2]:
# Opening JSON file of ward shapes
def get_all_ward_shapes(file_name):
    f = open(file_name,) 
    shape_dict = json.load(f)                     # Returns JSON object as a dictionary 
    f.close()
    return shape_dict

In [3]:
## Extracting coordinates for first ward
def get_ward_shape(shape_dict, ward_num):
    coords = shape_dict['features'][ward_num]["geometry"]["coordinates"][0]
    return coords

In [4]:
## Extracting max and mins for outer bounds of grid
def get_min_max(coords):
    flatten_coords = [item for sublist in coords for item in sublist]   # flattening the nested list
    max_lng = max(flatten_coords[::2])                                  # lngs are even values
    min_lng = min(flatten_coords[::2])
    max_lat = max(flatten_coords[1::2])                                 # lats are odd values
    min_lat = min(flatten_coords[1::2])
    return (max_lat, min_lat, max_lng, min_lng) 

In [21]:
## Extracting all of the centers for the rectangles in the grid
def extract_centers(max_lat, min_lat, max_lng, min_lng):
    center_top_left = (max_lat - Y_RAD, min_lng + X_RAD)    # Center of the top left rectangle in the grid
    center = center_top_left
    center_list = []
    while(center[0] + Y_RAD > min_lat):                     # Loop for changing rows
        row_centers = [center]
        while(center[1] + X_RAD < max_lng):                   # Loop for moving along row
            center = (center[0], center[1] + 2*X_RAD)
            row_centers.append(center)
        center_list.append(row_centers)
        center = (center[0] - 2*Y_RAD, center_top_left[1])
    return center_list

In [22]:
# dictionary of coordinates for all the wards
shape_file_dict = get_all_ward_shapes(os.path.join(SHAPE_DIR, "electoral wards for jhb (simple).json"))

In [23]:
# dataframe for storing the image centers for each ward
df_ward_centers = pd.DataFrame(0, index=np.arange(N_WARDS), columns=["ward_no.", "img_centers"])
df_ward_centers["img_centers"] = df_ward_centers["img_centers"].astype('object') # converting to object to be able to store lists
print(df_ward_centers.dtypes)
df_ward_centers.head()

ward_no.        int64
img_centers    object
dtype: object


Unnamed: 0,ward_no.,img_centers
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [24]:
# for each ward extract the image centers
for ward_num in range(1, N_WARDS+1):
    coords = get_ward_shape(shape_file_dict, ward_num-1)               # coordinates for ward
    max_lat, min_lat, max_lng, min_lng = get_min_max(coords)           # for bounding box
    center_list = extract_centers(max_lat, min_lat, max_lng, min_lng)  # get all the centers for ward
    df_ward_centers.at[ward_num-1, 'img_centers'] = center_list        # add the list of centers to the df
    df_ward_centers.at[ward_num-1, 'ward_no.'] = ward_num              # add the ward number to the df

In [25]:
print(df_ward_centers.shape)
df_ward_centers.head()

(130, 2)


Unnamed: 0,ward_no.,img_centers
0,1,"[[(-26.483020238999927, 27.828767997000057), (..."
1,2,"[[(-26.496054001999937, 27.86212946200004), (-..."
2,3,"[[(-26.47171999699998, 27.85499519800004), (-2..."
3,4,"[[(-26.448650998999938, 27.84236260700004), (-..."
4,5,"[[(-26.424091998999927, 27.763540001000024), (..."


In [28]:
# write to csv for safe keeping
df_ward_centers.to_csv(os.path.join(DATA_DIR, 'ward_image_centers.csv'), index=False)