In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import geopandas as gpd
from random import choice
from shapely.geometry import Point, Polygon, MultiPolygon

## Geospatial data of each ward in Banglore

In [2]:
with open("data/blr_populationDistribution_by_area.json", "r") as jsonData:
    data = json.load(jsonData)
    for i in range(len(data['features'])):
        data['features'][i]['properties']['WARD_NO'] = int(data['features'][i]['properties']['WARD_NO'])
        del(data['features'][i]['properties']['ASS_CONST_']) 
        del(data['features'][i]['properties']['ASS_CONST1']) 
        del(data['features'][i]['properties']['POP_M'])
        del(data['features'][i]['properties']['POP_F'])
        del(data['features'][i]['properties']['POP_SC'])
        del(data['features'][i]['properties']['POP_ST'])
        del(data['features'][i]['properties']['POP_TOTAL'])
        del(data['features'][i]['properties']['AREA_SQ_KM'])
        del(data['features'][i]['properties']['RESERVATIO'])

with open("data/blr_ward.GeoJSON", "w") as mapData:
    json.dump(data, mapData)

## Demographic data of Banglore

The data for the city of bengaluru is collected from the [smartcities.data.gov.in/](https://smartcities.data.gov.in/catalogsv2?format=json&offset=0&limit=9&query=bengaluru&sort%5B_score%5D=desc) portal.This data was collected during the 2011 Census and could have some errors (as on current date, due to factors like migration, mortality, etc.)

In order to download the data for other cities you will need to sign-up for an account. The data are available in different formats and for the ease to work with the prospective JS simulator, we have got the data as JSON.

The data we are interested to use is the demographic data which classify population grouped by age and area. 

In [3]:
#read GeoJSON as geopandas df
blrDF = gpd.read_file("data/blr_ward.GeoJSON")
blrDF = blrDF.rename(columns={"WARD_NO": "Ward No."})

with open("data/blr_demographics_2011.json", "r") as jsonData:
    data=json.load(jsonData)

#Get the  column names
colNames= []
for field in data['fields']:
    colNames.append(field['label'])

#Create a dataframe of the values
demographics = pd.read_json(json.dumps(data['data']))

#Replace column indices with columnNames
demographics.rename(columns = dict(zip(demographics.columns, colNames)),inplace=True)

#add population density per ward
demographics['Population Density'] = demographics.apply(lambda row: row['Total Population (in thousands)']/ row['Area (in sq km)'], axis=1)

demographics = demographics.drop(columns={'City Name', 'Zone Name','Population - Male (in thousands)', 'Population - female (in thousands)', 'population - children aged 0-14 (in thousands)', 'Population - youth aged 15-24 (in thousands)', 'Population - adults aged 25-60 (in thousands)', 'Population - Senior citizens aged 60+ (in thousands)'})

## Adding Households based on area

We add the data on the number of households based on the 2011 census data.


The averge household size for [Banglore](http://censusindia.gov.in/2011census/hh-series/HH-1/DDW-HH01-2900-2011.XLS) based on the Census Data is $4.0$. In order to get fine-grained data of household size per ward, we shall use compute the mean household size based on the data as, $$\text{Mean Household Size} = \frac{\text{Total Population per Ward}}{\text{Total Households per Ward}}$$

The mean of the computed household size of $4.078576$ matches with the average household size given on the census data

In [4]:
with open("data/blr_numberOfHouseholds_2011.json", "r") as jsonData:
    data = json.load(jsonData)

#Get the  column names
colNames= []
for field in data['fields']:
    colNames.append(field['label'])

#Create a dataframe of the values
households = pd.read_json(json.dumps(data['data']))

#Replace column indices with columnNames
households.rename(columns = dict(zip(households.columns, colNames)),inplace=True)
households = households.drop(columns=['City Name', 'Zone Name', 'Ward Name'])

#set Ward No. as index to the dataframe
households.set_index("Ward No.")

#join households with demographics on Ward No.
demographics = pd.merge(demographics, households, on="Ward No.", how="left")
del households

#add average household size per ward
demographics['Mean Household Size'] = demographics.apply(lambda row: row['Total Population (in thousands)']/ row['Total no. of Households'], axis = 1)


#add bounding box for ward and join households with demographics on Ward No.
blrDF['Ward Bound'] = blrDF.apply(lambda row: MultiPolygon(row['geometry']).bounds, axis=1)

def assignCC(row):
    lon, lat = MultiPolygon(row['geometry']).centroid.coords.xy
    return (lon[0], lat[0])

blrDF['community centre'] = blrDF.apply(assignCC, axis=1)
wardBounds = blrDF[['Ward No.', 'Ward Bound', 'community centre']]
demographics = pd.merge(demographics, wardBounds, on="Ward No.", how="left")

del wardBounds, blrDF

#demographics[['Ward No.', 'Ward Name', 'Total Population (in thousands)']].to_csv('data/preetam_age_split.csv')
demographics

Unnamed: 0,Ward Name,Ward No.,Area (in sq km),Total Population (in thousands),Population Density,Total no. of Households,Mean Household Size,Ward Bound,community centre
0,Kempegowda Ward,1,10.9,34783,3191.100917,8647,4.022551,"(77.5832496732538, 13.083031076381657, 77.6151...","(77.59971349953403, 13.116188361395015)"
1,Chowdeswari Ward,2,6.5,36602,5631.076923,9506,3.850410,"(77.56782518070644, 13.097196193805827, 77.597...","(77.58042209477, 13.121709445639025)"
2,Atturu,3,8.8,58129,6605.568182,14605,3.980075,"(77.53671797501944, 13.081687750136345, 77.586...","(77.56003775980791, 13.102804868968546)"
3,Yelahanka Satellite Town,4,4.6,41986,9127.391304,10583,3.967306,"(77.56502690261269, 13.076069048962552, 77.596...","(77.58392543074483, 13.09098665917691)"
4,Jakkuru,5,23.5,52025,2213.829787,12387,4.199968,"(77.59935461217444, 13.065356532206252, 77.649...","(77.62331395504853, 13.096250293422305)"
...,...,...,...,...,...,...,...,...,...
192,Maruthi Seva Nagar,193,6.6,58355,8841.666667,15272,3.821045,"(77.5861374573618, 12.864812462931585, 77.6190...","(77.60288373226696, 12.879219913584135)"
193,Sagayara Puram,194,7.4,51911,7015.000000,13457,3.857546,"(77.56953204216082, 12.836647297144697, 77.594...","(77.58213244983698, 12.860591262874562)"
194,SK Garden,195,3.4,57335,16863.235294,14984,3.826415,"(77.56657496444872, 12.874952990408245, 77.591...","(77.57990144561522, 12.885774827440871)"
195,Ramaswamy Palya,196,11.4,45608,4000.701754,11049,4.127794,"(77.54491491307054, 12.839118856553016, 77.582...","(77.56328568813218, 12.859587676186218)"


## Household and Age Distribution for Proportionated Population per Ward

The 2011 Census data for India has age distribution state-wise, we get the data for Karnataka. In the data, the age-bins are for 5 years, but the Imperial college paper has bins of 10 year age distribution, thus we create bins of 10 years by adding the two columns of the dataset to get the % of population belonging to a age-group. This is considered to be constant from 2011 to 2020 by assumption. Ideally, we would use a decade-growth rate which will be for the next iteration. The household size distribution data for [Banglore](http://censusindia.gov.in/2011census/hh-series/HH-1/DDW-HH01CITY-2900-2011.XLS).

<table class="waffle" cellspacing="0" cellpadding="0"><tbody><tr style="height:20px;"><th id="708093901R0" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">1</div></th><td class="s0" dir="ltr">Age group</td><td class="s1" dir="ltr">2011 pop</td><td class="s1" dir="ltr">age-10 bins</td></tr><tr style="height:20px;"><th id="708093901R1" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">2</div></th><td class="s0" dir="ltr">0-4</td><td class="s1" dir="ltr">8.3</td><td class="s1" dir="ltr" rowspan="2">16.9</td></tr><tr style="height:20px;"><th id="708093901R2" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">3</div></th><td class="s0" dir="ltr">5-9</td><td class="s1" dir="ltr">8.6</td></tr><tr style="height:20px;"><th id="708093901R3" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">4</div></th><td class="s0" dir="ltr">10-14</td><td class="s1" dir="ltr">9.4</td><td class="s1" dir="ltr" rowspan="2">18.9</td></tr><tr style="height:20px;"><th id="708093901R4" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">5</div></th><td class="s0" dir="ltr">15-19</td><td class="s1" dir="ltr">9.5</td></tr><tr style="height:20px;"><th id="708093901R5" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">6</div></th><td class="s0" dir="ltr">20-24</td><td class="s1" dir="ltr">9.9</td><td class="s1" dir="ltr" rowspan="2">19.3</td></tr><tr style="height:20px;"><th id="708093901R6" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">7</div></th><td class="s0" dir="ltr">25-29</td><td class="s1" dir="ltr">9.4</td></tr><tr style="height:20px;"><th id="708093901R7" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">8</div></th><td class="s0" dir="ltr">30-34</td><td class="s1" dir="ltr">7.7</td><td class="s1" dir="ltr" rowspan="2">15.4</td></tr><tr style="height:20px;"><th id="708093901R8" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">9</div></th><td class="s0" dir="ltr">35-39</td><td class="s1" dir="ltr">7.7</td></tr><tr style="height:20px;"><th id="708093901R9" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">10</div></th><td class="s0" dir="ltr">40-44</td><td class="s1" dir="ltr">6.3</td><td class="s1" dir="ltr" rowspan="2">12.1</td></tr><tr style="height:20px;"><th id="708093901R10" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">11</div></th><td class="s0" dir="ltr">45-49</td><td class="s1" dir="ltr">5.8</td></tr><tr style="height:20px;"><th id="708093901R11" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">12</div></th><td class="s0" dir="ltr">50-54</td><td class="s1" dir="ltr">4.4</td><td class="s1" dir="ltr" rowspan="2">7.9</td></tr><tr style="height:20px;"><th id="708093901R12" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">13</div></th><td class="s0" dir="ltr">55-59</td><td class="s1" dir="ltr">3.5</td></tr><tr style="height:20px;"><th id="708093901R13" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">14</div></th><td class="s0" dir="ltr">60-64</td><td class="s1" dir="ltr">3.4</td><td class="s1" dir="ltr" rowspan="2">5.9</td></tr><tr style="height:20px;"><th id="708093901R14" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">15</div></th><td class="s0" dir="ltr">65-69</td><td class="s1" dir="ltr">2.5</td></tr><tr style="height:20px;"><th id="708093901R15" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">16</div></th><td class="s0" dir="ltr">70-74</td><td class="s1" dir="ltr">1.7</td><td class="s1" dir="ltr" rowspan="2">2.6</td></tr><tr style="height:20px;"><th id="708093901R16" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">17</div></th><td class="s0" dir="ltr">75-79</td><td class="s1" dir="ltr">0.9</td></tr><tr style="height:20px;"><th id="708093901R17" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">18</div></th><td class="s0" dir="ltr">80-84</td><td class="s1" dir="ltr">0.6</td><td class="s1" dir="ltr" rowspan="6">1</td></tr><tr style="height:20px;"><th id="708093901R18" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">19</div></th><td class="s0" dir="ltr">85-89</td><td class="s1" dir="ltr">0.2</td></tr><tr style="height:20px;"><th id="708093901R19" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">20</div></th><td class="s0" dir="ltr">90-94</td><td class="s1" dir="ltr">0.1</td></tr><tr style="height:20px;"><th id="708093901R20" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">21</div></th><td class="s0" dir="ltr">95-99</td><td class="s1" dir="ltr">0.1</td></tr><tr style="height:20px;"><th id="708093901R21" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">22</div></th><td class="s0" dir="ltr">100+</td><td class="s1" dir="ltr">0</td></tr><tr style="height:20px;"><th id="708093901R22" style="height: 20px;" class="row-headers-background"><div class="row-header-wrapper" style="line-height: 20px;">23</div></th><td class="s0" dir="ltr">not stated</td><td class="s1" dir="ltr">0.1</td></tr></tbody></table>

In [5]:
population_2011 = sum(demographics['Total Population (in thousands)'].values)
target_population = 100000

scalingFactor = target_population/population_2011

demographics['new Population'] = demographics.apply(lambda row: row['Total Population (in thousands)'] * scalingFactor, axis = 1)
demographics['age 0-9'] = demographics.apply(lambda row: (row['new Population'] * 16.9)/100, axis = 1)
demographics['age 10-19'] = demographics.apply(lambda row: (row['new Population'] * 18.9)/100, axis = 1)
demographics['age 20-29'] = demographics.apply(lambda row: (row['new Population'] * 19.3)/100, axis = 1)
demographics['age 30-39'] = demographics.apply(lambda row: (row['new Population'] * 15.4)/100, axis = 1)
demographics['age 40-49'] = demographics.apply(lambda row: (row['new Population'] * 12.1)/100, axis = 1)
demographics['age 50-59'] = demographics.apply(lambda row: (row['new Population'] * 7.9)/100, axis = 1)
demographics['age 60-69'] = demographics.apply(lambda row: (row['new Population'] * 5.9)/100, axis = 1)
demographics['age 70-79'] = demographics.apply(lambda row: (row['new Population'] * 2.6)/100, axis = 1)
demographics['age 80+'] = demographics.apply(lambda row: (row['new Population'] * 1)/100, axis = 1)

#add scaled household size per ward
demographics['Scaled Number of Households'] = demographics.apply(lambda row: row['new Population']/ demographics['Mean Household Size'].describe()['mean'], axis = 1)

number_of_houses = sum(demographics['Scaled Number of Households'].values)

scalingFactor1 = sum(demographics['Total no. of Households'].values) / sum(demographics['Scaled Number of Households'].values)
householdSizes = ["1", "2", "3", "4", "5", "6", "7-10", "11-14", "15+"]
householdDistributions = [87638, 274556, 467839, 646184, 321320, 152359, 135412, 11346, 3088]
newDistribution = [i*scalingFactor1 for i in householdDistributions]

weights = [x/number_of_houses for x in householdDistributions]


demographics

Unnamed: 0,Ward Name,Ward No.,Area (in sq km),Total Population (in thousands),Population Density,Total no. of Households,Mean Household Size,Ward Bound,community centre,new Population,age 0-9,age 10-19,age 20-29,age 30-39,age 40-49,age 50-59,age 60-69,age 70-79,age 80+,Scaled Number of Households
0,Kempegowda Ward,1,10.9,34783,3191.100917,8647,4.022551,"(77.5832496732538, 13.083031076381657, 77.6151...","(77.59971349953403, 13.116188361395015)",414.417087,70.036488,78.324829,79.982498,63.820231,50.144468,32.738950,24.450608,10.774844,4.144171,101.608283
1,Chowdeswari Ward,2,6.5,36602,5631.076923,9506,3.850410,"(77.56782518070644, 13.097196193805827, 77.597...","(77.58042209477, 13.121709445639025)",436.089303,73.699092,82.420878,84.165235,67.157753,52.766806,34.451055,25.729269,11.338322,4.360893,106.921955
2,Atturu,3,8.8,58129,6605.568182,14605,3.980075,"(77.53671797501944, 13.081687750136345, 77.586...","(77.56003775980791, 13.102804868968546)",692.569671,117.044274,130.895668,133.665946,106.655729,83.800930,54.713004,40.861611,18.006811,6.925697,169.806741
3,Yelahanka Satellite Town,4,4.6,41986,9127.391304,10583,3.967306,"(77.56502690261269, 13.076069048962552, 77.596...","(77.58392543074483, 13.09098665917691)",500.236202,84.539918,94.544642,96.545587,77.036375,60.528580,39.518660,29.513936,13.006141,5.002362,122.649724
4,Jakkuru,5,23.5,52025,2213.829787,12387,4.199968,"(77.59935461217444, 13.065356532206252, 77.649...","(77.62331395504853, 13.096250293422305)",619.844434,104.753709,117.150598,119.629976,95.456043,75.001177,48.967710,36.570822,16.115955,6.198444,151.975704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,Maruthi Seva Nagar,193,6.6,58355,8841.666667,15272,3.821045,"(77.5861374573618, 12.864812462931585, 77.6190...","(77.60288373226696, 12.879219913584135)",695.262315,117.499331,131.404578,134.185627,107.070397,84.126740,54.925723,41.020477,18.076820,6.952623,170.466933
193,Sagayara Puram,194,7.4,51911,7015.000000,13457,3.857546,"(77.56953204216082, 12.836647297144697, 77.594...","(77.58213244983698, 12.860591262874562)",618.486198,104.524167,116.893891,119.367836,95.246874,74.836830,48.860410,36.490686,16.080641,6.184862,151.642686
194,SK Garden,195,3.4,57335,16863.235294,14984,3.826415,"(77.56657496444872, 12.874952990408245, 77.591...","(77.57990144561522, 12.885774827440871)",683.109671,115.445534,129.107728,131.840167,105.198889,82.656270,53.965664,40.303471,17.760851,6.831097,167.487304
195,Ramaswamy Palya,196,11.4,45608,4000.701754,11049,4.127794,"(77.54491491307054, 12.839118856553016, 77.582...","(77.56328568813218, 12.859587676186218)",543.390004,91.832911,102.700711,104.874271,83.682061,65.750190,42.927810,32.060010,14.128140,5.433900,133.230330


## Instantiate houses with location in ward

In [6]:
def assignHouses(row):
    bounds = row['Ward Bound']
    households = int(row['Scaled Number of Households'])
    lon1 = bounds[0]
    lat1 = bounds[1]
    lon2 = bounds[2]
    lat2 = bounds[3]
    
    houses = []
    for house in range(households):
        houses.append({
            "house_id": house,
            "location": (choice([lon1, lon2]), choice([lat1, lat2]))
        })
    return houses

demographics['Houses'] = demographics.apply(assignHouses, axis=1)

## Instantiate schools in each ward

In [7]:
averageStudents = 300

def addSchools(row):
    bounds = row['Ward Bound']
    totalSchools = int(np.ceil(((row['age 0-9']/6) + (row['age 20-29']/4) + row['age 10-19'])/ averageStudents))

    lon1 = bounds[0]
    lat1 = bounds[1]
    lon2 = bounds[2]
    lat2 = bounds[3]
    
    schools = []
    for school in range(totalSchools):
        schools.append({
            "school_id": school,
            "location": (choice([lon1, lon2]), choice([lat1, lat2]))
        })
    return schools
        
    
demographics['Schools'] = demographics.apply(addSchools, axis=1)

## Adding Workplaces - Offices and Community Space on GeoMap

Number of Community Space = 1/ward 

In [10]:
averageWorking = 200
print(df.iloc[:, -4:-1].sum(axis=1))


# demographics['workplaces'] = demographics.apply(lambda row: np.ceil((row['age 50-59'] + (row['age 20-29']*3/4) + row['age 30-39'] + row['age 40-49'])) , axis=1)

# totalWorkforce = sum(demographics['workplaces'].values) #thailand data

IndentationError: expected an indented block (<ipython-input-10-0049e8468f85>, line 4)

## Dump of static data as json

In [9]:
pois = demographics[['Ward No.', 'Houses', 'community centre' , 'Schools']]


with open("data/dump.json", "w") as f:
      f.write(json.dumps(pois.to_json(orient="records", lines=True)))