In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright [2020] [Indian Institute of Science, Bangalore]
SPDX-License-Identifier: Apache-2.0
"""
__name__ = "Instantiate a city and dump instantiations as json"

import os, sys
import json
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import time

#Data-processing Functions
from modules.processDemographics import *
from modules.processGeoData import *

# Functions to instantiate individuals to houses, schools, workplaces and community centres
from modules.assignHouses import *
from modules.assignSchools import *
from modules.assignWorkplaces import *


city = "bangalore"
targetPopulation = 10000
averageStudents = 17
averageWorkforce = 2






In [2]:
## Processing Data files

In [3]:
#create directory to store parsed data
if not os.path.exists("data/"+city):
    os.mkdir("data/"+city)   

print("processing data ready ...")
start = time.time()
cityGeojson = "data/base/"+city+"/city.geojson"
cityGeoDF = parse_geospatial_data(cityGeojson)

if "cityProfile.json" in os.listdir("data/base/"+city):
    cityProfile = "data/base/"+city+"/cityProfile.json"
    ageDistribution, householdDistribution, schoolDistribution = process_city_profile(cityProfile)

demographicsData = pd.read_csv("data/base/"+city+"/demographics.csv")
housesData = pd.read_csv("data/base/"+city+"/households.csv")
employmentData = pd.read_csv("data/base/"+city+"/employment.csv")
print("processing data completed completed in ", time.time() - start)

processing data ready ...
processing data completed completed in  3.0829808712005615


In [4]:
## Computing additional parameters

In [5]:
print("getting parameters ready ...")
start = time.time()
demographicsData = process_data(demographicsData, housesData, employmentData, targetPopulation, ageDistribution) 

totalPopulation = demographicsData['Total Population (in thousands)'].values.sum()
people_over_60 = float(demographicsData[['age 60-64']].sum()) + float(demographicsData[['age 65-69']].sum()) + float(demographicsData[['age 70-74']].sum()) + float(demographicsData[['age 75-79']].sum()) + float(demographicsData[['age 80+']].sum())

population_over_60 = totalPopulation * (people_over_60/ totalPopulation)
total_employable = (float(demographicsData[['age 15-19']].sum())+\
                                                float(demographicsData[['age 20-24']].sum()))+\
                                                float(demographicsData[['age 25-29']].sum())+\
                                                float(demographicsData[['age 30-34']].sum())+\
                                                float(demographicsData[['age 35-39']].sum())+\
                                                float(demographicsData[['age 40-44']].sum())+\
                                                float(demographicsData[['age 45-49']].sum())+\
                                                float(demographicsData[['age 50-54']].sum())+\
                                                float(demographicsData[['age 55-59']].sum())

employable_population = totalPopulation * ((total_employable/totalPopulation)) + ((float(demographicsData[['age 15-19']].sum())/totalPopulation) * 0.5)

total_unemployed = demographicsData['unemployed'].values.sum()
unemployed_but_employable = total_unemployed - population_over_60
unemployed_fraction = unemployed_but_employable  / (totalPopulation - float(demographicsData[['population - children aged 0-14 (in thousands)']].sum()) - population_over_60)

# print(people_over_60, unemployed_fraction, employable_population, total_employable, total_unemployed, unemployed_but_employable )

totalNumberOfWards = len(demographicsData['Ward No.'].values)
averageHouseholds = totalPopulation / demographicsData['totalHouseholds'].values.sum()

commonArea = commonAreaLocation(cityGeoDF)
print("getting parameters ready completed in ", time.time() - start)


getting parameters ready ...
getting parameters ready completed in  0.1620643138885498


In [6]:
## Assignment of individuals to households

In [7]:
print("instantiating individuals to households...")
start = time.time()
individuals, households = assign_individuals_to_houses(targetPopulation, totalNumberOfWards, averageHouseholds, ageDistribution, householdDistribution, unemployed_fraction)
print("instantiating individuals to households completed in ", time.time() - start)

print("instantiating individual location by house location...")
start = time.time()
households, individuals = houseLocation(cityGeoDF, individuals, households)
print("instantiating individual location by house location completed in ", time.time() - start)

instantiating individuals to households...
instantiating individuals to households completed in  11.18013072013855
instantiating individual location by house location...
instantiating individual location by house location completed in  0.3364753723144531


## Assigning individuals to workplaces and schools

In [8]:
print("instantiating individuals to workplaces...")
start = time.time()
workplaces, individuals = assign_workplaces(cityGeoDF, individuals)
print("instantiating individuals to workplaces completed in ", time.time() - start)

instantiating individuals to workplaces...
instantiating individuals to workplaces completed in  17.985671520233154


In [9]:
print("instantiating individuals to schools...")
start = time.time()
individuals, schools = assign_schools(individuals, cityGeoDF,  schoolDistribution)
print("instantiating individuals to schools completed in ", time.time() - start)

instantiating individuals to schools...
instantiating individuals to schools completed in  2.6760940551757812


## additonal processing based on demographics data

In [10]:
print("additonal data processing...")
start = time.time()

#associate individuals to common areas (by distance) and categorize workplace Type
def getDistances(row, cc):
    findCommunityCentre = cc[int(row["wardNo"])]
    lat1 = row['lat']
    lon1 = row['lon']

    lat2 = findCommunityCentre[1]
    lon2 = findCommunityCentre[0]

    return distance(lat1, lon1, lat2, lon2)

individuals['CommunityCentreDistance'] = individuals.apply(getDistances, axis=1, args=(commonArea['location'].values,))

#Combining the IDs for schools and workplaces
schoolID = schools['ID'].values[-1]
workplaceID = [schoolID+1 + index for index in workplaces['ID'].values]
workplaces['ID'] = workplaceID
workplaces = workplaces.sort_values(by=['ID'])

demographicsData['fracPopulation'] = demographicsData.apply(lambda row: row['Total Population (in thousands)']/demographicsData['Total Population (in thousands)'].values.sum(), axis=1)
demographicsData = demographicsData.rename(columns={"Ward No.": "wardNo"})
print("additonal data processing completed in ", time.time() - start)

additonal data processing...
additonal data processing completed in  0.6415956020355225


## Dumping Instantiated static files as JSONs

In [11]:
print("saving instantiations as JSON....")
start = time.time()
individuals.to_json("data/"+city+"/individuals.json", orient='records')
households[['id', 'wardNo' ,'lat', 'lon']].to_json("data/"+city+"/houses.json", orient='records')
schools[['ID', 'ward' ,'lat', 'lon']].to_json("data/"+city+"/schools.json", orient='records')
workplaces[['ID', 'ward' ,'lat', 'lon']].to_json("data/"+city+"/workplaces.json", orient='records')
commonArea[['ID', 'wardNo' ,'lat', 'lon']].to_json("data/"+city+"/commonArea.json", orient='records')
computeWardCentreDistance(cityGeoDF, "data/"+city+"/wardCentreDistance.json")
demographicsData[['wardNo', 'Total Population (in thousands)', 'fracPopulation']].to_json("data/"+city+"/fractionPopulation.json", orient="records")
print("saving instantiations as JSON completed in ", time.time() - start)

saving instantiations as JSON....
saving instantiations as JSON completed in  0.0832223892211914
