In [1]:
%run -i ./startup.py
%run -i ./functions/dc_transform.py
%run -i ./functions/dc_join.py
%run -i ./functions/dc_list.py
%run -i ./functions/dc_keys.py
#%run -i ./functions/datagov_spider.py

In [2]:
import re
import nltk
import shapefile
import numpy as np
import seaborn as sns
import geopandas as gpd
import ipyleaflet as ipl

from branca.colormap import linear
from datetime import date
from unidecode import unidecode
from matplotlib import pyplot as plt

In [3]:
in_dir = '/home/josegois/data/maps/brazil/estados'
in_shp = in_dir + '/UFEBRASIL.shp'

br_estados = gpd.read_file(in_shp)

In [4]:
getl  = lambda df,i,v: df[df[i].isin(v)].reset_index(drop=True)
getin = lambda df,i,v: df[df[i].isin(np.unique(v))].reset_index(drop=True)
getre = lambda df,i,v: df[df[i].isin(list(filter(lambda i : re.findall(v,i),df[i])))].reset_index(drop=True)

In [5]:
def dc_data(key_pattern='^.*$'):
    data = []
    for key in dc_join.dict.keys():
        if re.search(key_pattern,key):
            data.append(dc_join.dict[key]['data'])
    if ( len(data) == 1 ):
        return data[0]
    else:
        return data

In [6]:
def dc_notin(data, fields, exvals):
    n_ex       = np.empty((len(exvals),len(fields)))
    n_ex[:]    = 0
    
    id_include    = np.empty((dc_perspective.shape[0],),dtype=bool)
    id_include[:] = True
                    
    for k,key in enumerate(fields):
        for e,exval in enumerate(exvals):
            id_exval   = dc_perspective[key].isin([exval]).values 
            n_ex[e,k]  = sum(id_exval) 
            id_include = id_include & ( ~id_exval )

        data = dc_perspective.iloc[id_include]
    return (data,n_ex)

In [7]:
def levdist (A,B):
    n_a = len(A)
    n_b = len(B)

    states_distance = np.empty((n_a,n_b))

    for a in range ( n_a ):
        for b in range ( n_b ):
            states_distance[a,b] = nltk.edit_distance(A[a], B[b])
    return states_distance

In [8]:
dc_alias_re = {
    'discentes'                : '^dados_complementares_.*discentes$',
    'discentes_complementar'   : '.*socio_economicos_.*discentes*'
}

for dc_alias,dc_re in dc_alias_re.items():
    dc_join(db=db,dc=dc_re,alias=dc_alias)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [9]:
discentes               = dc_data('^discentes$').merge(dc_data('^discentes_'),on='id_discente')
discentes.estado_origem = discentes.estado_origem.astype('category')

In [11]:
def dc_notin(data, fields, exvals):
    n_ex       = np.empty((len(exvals),len(fields)))
    n_ex[:]    = 0

    for k,key in enumerate(fields):
        id_include    = np.empty((data.shape[0],),dtype=bool)
        id_include[:] = True

        for e,exval in enumerate(exvals):
            id_exval   = data[key].isin([exval]).values 
            n_ex[e,k]  = sum(id_exval) 
            id_include = id_include & ( ~id_exval )

        data = data.iloc[id_include]
    return (data,n_ex)

In [12]:
(discentes,n_excluded) =  dc_notin(discentes, ['estado','estado_origem'], [np.nan, 'Ignorado/exterior','Não Informado'])

In [13]:
estado_decoder = lambda x: re.sub('[^a-z]','',unidecode(x.lower()))

br_estados['estado_decoded'] = br_estados.NM_ESTADO.apply(estado_decoder)
discentes['estado_decoded'] = discentes.estado.apply(estado_decoder)
discentes['estado_origem_decoded'] = discentes.estado_origem.apply(estado_decoder)

discentes = discentes.merge(br_estados[['CD_GEOCODU','estado_decoded']], left_on='estado_decoded', right_on='estado_decoded', suffixes=('','_br'))
discentes = discentes.merge(br_estados[['CD_GEOCODU','estado_decoded']], left_on='estado_origem_decoded', right_on='estado_decoded', suffixes=('','_br'))

In [14]:
br_geojson = {
        'type' : 'FeatureCollection',
        'features' : []
    }

for estado in br_estados.iloc:
    if 'Mul' in str(estado.geometry.__class__):
        coord = [list(map(lambda p: list(p), eg.exterior.coords)) for eg in estado.geometry]
    else:
        coord = [list(map(lambda p: list(p), estado.geometry.exterior.coords))]

    d = {
        'type' : 'Feature',
        'id' : estado.CD_GEOCODU,
        'properties' : {'name' : estado.NM_ESTADO},
        'geometry' : {
            'type' : 'Polygon',
            'coordinates' : coord
        }
    }
    br_geojson['features'].append(d)

In [15]:
discentes_states = discentes.groupby(['CD_GEOCODU']).count()['id_discente']
discentes_states_origin = discentes.groupby(['CD_GEOCODU_br']).count()['id_discente']

dict_log = lambda d: {k:np.log10(v) for k,v in d.items()}

layer_1_dict = discentes_states.to_dict()
layer_2_dict = discentes_states_origin.to_dict()

layer_1_dict = dict_log(layer_1_dict)
layer_2_dict = dict_log(layer_2_dict)

In [16]:
br_layer_1 = ipl.Choropleth(
    geo_data=br_geojson,
    choro_data=layer_1_dict,
    colormap=linear.YlOrRd_04,
    border_color='black',
    style={'fillOpacity': 0.8, 'dashArray': '5, 5'},
    hover_style={
        'color': 'white', 'dashArray': '0', 'fillOpacity': 0.9
    })

In [17]:
m = ipl.Map(center = (-20,-50), zoom = 3)
m.add_layer(br_layer)
m.add_control(ipl.FullScreenControl())
m

Map(center=[-20, -50], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…

In [21]:
br_layer_2 = ipl.Choropleth(
    geo_data=br_geojson,
    choro_data=layer_2_dict,
    colormap=linear.YlOrRd_04,
    border_color='black',
    style={'fillOpacity': 0.8, 'dashArray': '5, 5'},
    hover_style={
        'color': 'white', 'dashArray': '0', 'fillOpacity': 0.9
    })

In [22]:
m2 = ipl.Map(center = (-20,-50), zoom = 3)
m2.add_layer(br_layer_2)
m2.add_control(ipl.FullScreenControl())
m2

Map(center=[-20, -50], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…