Authors: Luís Eduardo Anunciado Silva, Mayra Dantas de Azevedo

In [0]:
import requests
import json

import pandas as pd
import numpy as np

## Schools and hospitals

Using the StreetMap API, we get the schools and hospitals registered in Natal, RN.

In [0]:
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json][timeout:25]; area(3600301091)->.searchArea; ( node['amenity'='hospital'](area.searchArea); node['amenity'='school'](area.searchArea);relation['admin_level'='10'](area.searchArea); ); out body; >; out center;
"""
response = requests.get(overpass_url, 
                        params={'data': overpass_query})
data = response.json()

In [0]:
df = pd.DataFrame(data['elements'])

In [4]:
df.head()

Unnamed: 0,center,id,lat,lon,members,nodes,tags,type
0,,501170977,-5.869024,-35.234268,,,{'amenity': 'school'},node
1,,501170997,-5.82361,-35.222611,,,"{'amenity': 'school', 'name': 'Centro de Atenç...",node
2,,501171016,-5.871019,-35.221614,,,"{'amenity': 'school', 'name': 'Piaget'}",node
3,,501619315,-5.816246,-35.204844,,,"{'amenity': 'hospital', 'name': 'Hospital da U...",node
4,,501784871,-5.812943,-35.210726,,,{'amenity': 'school'},node


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2770 entries, 0 to 2769
Data columns (total 8 columns):
center     407 non-null object
id         2770 non-null int64
lat        2327 non-null float64
lon        2327 non-null float64
members    36 non-null object
nodes      407 non-null object
tags       499 non-null object
type       2770 non-null object
dtypes: float64(2), int64(1), object(5)
memory usage: 173.2+ KB


Regarding the processing of the amenitites, we create a column that keeps the category of the amenity: school or hospital 

In [0]:
def get_amenity(row):
  try:
    amenity = row['amenity']
    return amenity
  except:
    return np.nan
  
def get_name(row):
  try:
    name = row['name']
    return name
  except:
    return np.nan

In [0]:
df['amenity'] = df.tags.apply(get_amenity)
df['name'] = df.tags.apply(get_name)

In [8]:
df['amenity'].unique()

array(['school', 'hospital', nan, 'bench'], dtype=object)

In [9]:
df.head()

Unnamed: 0,center,id,lat,lon,members,nodes,tags,type,amenity,name
0,,501170977,-5.869024,-35.234268,,,{'amenity': 'school'},node,school,
1,,501170997,-5.82361,-35.222611,,,"{'amenity': 'school', 'name': 'Centro de Atenç...",node,school,Centro de Atenção Integral a Criança e ao Adol...
2,,501171016,-5.871019,-35.221614,,,"{'amenity': 'school', 'name': 'Piaget'}",node,school,Piaget
3,,501619315,-5.816246,-35.204844,,,"{'amenity': 'hospital', 'name': 'Hospital da U...",node,hospital,Hospital da UNIMED
4,,501784871,-5.812943,-35.210726,,,{'amenity': 'school'},node,school,


## Placing the points at neighborhoods

In this section, we use the SIDRA API to recover the neighborhoods.

In [0]:
# load the GeoJSON data and use 'UTF-8'encoding
geo_json_natal = requests.get('https://github.com/nymarya/data-science-one/blob/master/Lesson%2314/natal.geojson?raw=true').json()

## Recovering data about indicators from the API

In [0]:
# households by neighborhood in Natal (2010)
# from Sidra's Table 185 (http://api.sidra.ibge.gov.br/desctabapi.aspx?c=185)
headers = {
    'Content-Type': 'application/json;charset=UTF-8',
    'User-Agent': 'google-colab',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection': 'keep-alive',
}


endnode = "http://api.sidra.ibge.gov.br/values/t/3170/p/2010/v/allxp/N102/in%20n6%202408102"

response = requests.get(endnode,headers=headers)

# for curiosity, take a look in this variable
raw_data = response.json()

In [0]:
raw_data

[{'D1C': 'Ano (Código)',
  'D1N': 'Ano',
  'D2C': 'Variável (Código)',
  'D2N': 'Variável',
  'D3C': 'Bairro (Código)',
  'D3N': 'Bairro',
  'D4C': 'Situação do domicílio (Código)',
  'D4N': 'Situação do domicílio',
  'D5C': 'Sexo (Código)',
  'D5N': 'Sexo',
  'D6C': 'Grupo de idade (Código)',
  'D6N': 'Grupo de idade',
  'MC': 'Unidade de Medida (Código)',
  'MN': 'Unidade de Medida',
  'V': 'Valor'},
 {'D1C': '2010',
  'D1N': '2010',
  'D2C': '841',
  'D2N': 'Pessoas de 10 anos ou mais de idade, com rendimento',
  'D3C': '2408102001',
  'D3N': 'Santos Reis - Natal - RN',
  'D4C': '6795',
  'D4N': 'Total',
  'D5C': '6794',
  'D5N': 'Total',
  'D6C': '95253',
  'D6N': 'Total',
  'MC': '45',
  'MN': 'Pessoas',
  'V': '2989'},
 {'D1C': '2010',
  'D1N': '2010',
  'D2C': '841',
  'D2N': 'Pessoas de 10 anos ou mais de idade, com rendimento',
  'D3C': '2408102002',
  'D3N': 'Praia do Meio - Natal - RN',
  'D4C': '6795',
  'D4N': 'Total',
  'D5C': '6794',
  'D5N': 'Total',
  'D6C': '95253',
 

In [0]:
neigh_id = []
neigh_name = []
neigh_house = []

# first position is only the header
for data in raw_data[1:]:
  neigh_id.append(int(data["D3C"]))
  neigh_house.append(float(data["V"]))
  neigh_name.append(data["D3N"].split(" -")[0])
  
neigh_df = pd.DataFrame.from_dict({"neighborhood_id": neigh_id,
                                  "name":neigh_name,
                                  "income": neigh_house})
neigh_df.head()

Unnamed: 0,neighborhood_id,name,households
0,2408102001,Santos Reis,2989.0
1,2408102002,Praia do Meio,2810.0
2,2408102003,Rocas,5806.0
3,2408102004,Ribeira,1453.0
4,2408102005,Petrópolis,3288.0
