In [19]:
# Libraries for GeoJSON, MySQL and data visualization
import pandas as pd
import geopandas as gpd
import getpass
import pymysql
from os.path import join
from sqlalchemy import create_engine
from shapely.geometry import Point

# Estados brasileiros
state_codes = {
    # Norte
    "12": "AC",
    "13": "AM",
    "16": "AP",
    "15": "PA",
    "11": "RO",
    "14": "RR",
    "17": "TO",
    # Nordeste
    "27": "AL",
    "29": "BA",
    "23": "CE",
    "21": "MA",
    "25": "PB",
    "26": "PE",
    "22": "PI",
    "24": "RN",
    "28": "SE",
    # Sudeste
    "32": "ES",
    "31": "MG",
    "33": "RJ",
    "35": "SP",
    # Sul
    "41": "PR",
    "43": "RS",
    "42": "SC",
    # Centro-Oeste
    "53": "DF",
    "52": "GO",
    "51": "MT",
    "50": "MS",
}

states = [
    ['AC', 'Acre'],
    ['AL', 'Alagoas'],
    ['AM', 'Amazonas'],
    ['AP', 'Amapá'],
    ['BA', 'Bahia'],
    ['CE', 'Ceará'],
    ['DF', 'Distrito Federal'],
    ['ES', 'Espírito Santo'],
    ['GO', 'Goiás'],
    ['MA', 'Maranhão'],
    ['MG', 'Minas Gerais'],
    ['MS', 'Mato Grosso do Sul'],
    ['MT', 'Mato Grosso'],
    ['PA', 'Pará'],
    ['PB', 'Paraíba'],
    ['PE', 'Pernambuco'],
    ['PI', 'Piauí'],
    ['PR', 'Paraná'],
    ['RJ', 'Rio de Janeiro'],
    ['RN', 'Rio Grande do Norte'],
    ['RO', 'Rondônia'],
    ['RR', 'Roraima'],
    ['RS', 'Rio Grande do Sul'],
    ['SC', 'Santa Catarina'],
    ['SE', 'Sergipe'],
    ['SP', 'São Paulo'],
    ['TO', 'Tocantins']
]

A primeira base é referente a Unidades Básicas de Saúde (UBS) do Sistema Único de Saúde.

In [4]:
def pointify(x, y):
    #if not x.isna() and not y.isna():
    return Point(float(x.replace(',','.')), float(y.replace(',','.')))

rename = {'NOME': 'name', 'CNES': 'cnes'}

ubs = pd.read_csv(join('data', 'cadastro_estabelecimentos_cnes.csv'), sep=';').dropna(subset=['LATITUDE','LONGITUDE'])
ubs['geometry'] = ubs.apply(lambda x: pointify(x['LONGITUDE'], x['LATITUDE']), axis=1)
ubs = ubs.drop(columns=['IBGE', 'LOGRADOURO', 'BAIRRO', 'LONGITUDE', 'LATITUDE']).rename(columns = rename)
ubs['UF'] = ubs.loc[:, 'UF'].map(lambda x: state_codes[str(x)])
ubs

  arr = construct_1d_object_array_from_listlike(values)


Unnamed: 0,cnes,UF,name,geometry
0,33820,GO,UNIDADE DE SAUDE DA FAMILIA PSF 307,POINT (-52.22545 -15.90682)
1,108,PE,USF ALTO DOS INDIOS,POINT (-35.0321 -8.28389)
2,116,PE,USF CHARNECA II,POINT (-35.02819 -8.28353)
3,124,PE,USF SAO FRANCISCO I,POINT (-35.035 -8.287)
4,132,PE,USF ROSARIO,POINT (-35.0321 -8.28389)
...,...,...,...,...
41817,9718907,CE,JOAO ARISTON DIAS,POINT (-40.06244 -3.12156)
41818,9773274,RJ,ESTETIC DENTE,POINT (-43.10604 -22.90744)
41819,9776737,SP,UBS RACLE BARRETO DA SILVA,POINT (-51.49013 -21.1123)
41820,9842330,RN,UNIDADE POTENGI,POINT (-36.07464 -5.97503)


In [20]:
rename = {'siglauf': 'UF', 'anomes': 'y_m'}

bolsa_familia = pd.read_csv(join('data', 'misocial.csv'))
bolsa_familia.rename(columns=rename, inplace = True)
bolsa_familia['y_m'] = bolsa_familia['y_m'].map(lambda x: str(x)[:4]+'-'+str(x)[4:])
bolsa_familia

Unnamed: 0,ibge,UF,y_m,qtd_ben_bas,qtd_ben_var,qtd_ben_bvj,qtd_ben_bvn,qtd_ben_bvg,qtd_ben_bsp
0,110001,RO,2020-01,1173,2416,220.0,43.0,57.0,234.0
1,110001,RO,2020-02,1153,2365,234.0,35.0,47.0,239.0
2,110001,RO,2020-03,1143,2378,241.0,37.0,48.0,230.0
3,110001,RO,2020-04,1158,2396,252.0,39.0,45.0,231.0
4,110001,RO,2020-05,1163,2375,264.0,36.0,55.0,234.0
...,...,...,...,...,...,...,...,...,...
66835,530010,DF,2020-08,75819,123165,14245.0,683.0,1527.0,35417.0
66836,530010,DF,2020-09,75865,120746,16621.0,455.0,2333.0,35375.0
66837,530010,DF,2020-10,75939,120750,16620.0,81.0,2049.0,35406.0
66838,530010,DF,2020-11,76025,120760,16620.0,62.0,1820.0,35434.0


ben_bas: Benefício Básico - R$ 89,00 \
ben_var: Benefício Variável - R$ 41,00 \
ben_bvj: Benefício Variável Jovem - R$ 48 \
ben_bvn: Benefício Variável Nutriz (criança de até 6 meses) \
ben_bvg: Benefício Variável à Gestante \
ben_bsp: Benefício para a Superação da Extrema Pobreza

In [6]:
brazil = pd.DataFrame(columns = ['id','name','geometry'])

for state in state_codes.keys():
    data = gpd.read_file(f'data/geojson/geojs-{state}-mun.json').drop('description', axis = 1)
    data['UF'] = state_codes[state]
    brazil = brazil.append(data, ignore_index = True)
brazil

Unnamed: 0,id,name,geometry,UF
0,1200013,Acrelândia,"POLYGON ((-67.13305 -9.67614, -67.05956 -9.708...",AC
1,1200054,Assis Brasil,"POLYGON ((-69.58777 -10.37119, -69.58226 -10.3...",AC
2,1200104,Brasiléia,"POLYGON ((-69.12877 -10.40171, -69.13691 -10.4...",AC
3,1200138,Bujari,"POLYGON ((-68.21979 -9.24215, -67.97663 -9.352...",AC
4,1200179,Capixaba,"POLYGON ((-67.77317 -10.24188, -67.77008 -10.2...",AC
...,...,...,...,...
5559,5007950,Tacuru,"POLYGON ((-55.08865 -23.46852, -55.04589 -23.4...",MS
5560,5007976,Taquarussu,"POLYGON ((-53.28058 -22.57759, -53.28178 -22.6...",MS
5561,5008008,Terenos,"POLYGON ((-55.13542 -20.15355, -55.13242 -20.1...",MS
5562,5008305,Três Lagoas,"POLYGON ((-52.47566 -19.61053, -52.46836 -19.6...",MS


In [7]:
def fix(polygon):
    if polygon is None:
        pass
    else:
        return polygon.wkt

brazil['geometry'] = brazil['geometry'].map(fix)
ubs['geometry'] = ubs['geometry'].map(fix)

In [8]:
p = getpass.getpass()
connection = pymysql.connect(host = 'localhost', port = 3306, user = 'root', passwd = p) #, db='mysql')
# connection.autocommit(True)
cursor = connection.cursor()

values = brazil.values.tolist()
values2 = ubs.values.tolist()

cursor.execute('USE geo_analysis;')
cursor.executemany('INSERT INTO states (code, state_name) VALUES (%s,%s);', states)
cursor.executemany('INSERT INTO cities (id, name, geometry, UF) VALUES (%s,%s,ST_GeomFromText(%s),%s);', values)
cursor.executemany('INSERT INTO ubs (cnes, UF, name, geometry) VALUES (%s,%s,%s,ST_GeomFromText(%s));', values2)

39650

In [9]:
connection.commit()
cursor.close()
connection.close()

In [25]:
engine = create_engine(f'mysql+pymysql://root:{p}@localhost/geo_analysis')
bolsa_familia.to_sql(name = 'bolsa_familia', con = engine, if_exists = 'append', index = False)