# <span style="color:blue">Población</span>
# <span style="color:gray">Animated Scatter Plot</span>

<b>Axes:</b>

*  <b>x:</b> Porcentaje de población abastecida sobre población total del servicio autorizado
*  <b>y:</b> Segregación por Categoría

<b>Points:</b> A point for each Epsa. Colored based on the EPSA's categories. Size fixed.

<b>Filters:</b> Epsas are split into categories, which can be selected individually for display.

<b>Frames:</b> A frame for each year with animation option supporting smooth transitions.

<b>Environment:</b> Local with python objects simulating plotly grids for online compatibility. 

<b>LIBRARY IMPORTS</b>

In [2]:
import requests
import pandas as pd
import numpy as np
import math
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [3]:
epsas_r = requests.get('https://peridash.ml/api/epsas')

In [4]:
epsas_df = pd.read_json(epsas_r.text)

In [5]:
with open('epsas.json', 'w') as f:
    f.write(epsas_df.to_json())

<b>API REQUESTS</b>

In [3]:
reports_r = requests.get('https://peridash.ml/api/reports')
measurements_r = requests.get('https://peridash.ml/api/measurements')
epsas_r = requests.get('https://peridash.ml/api/epsas')
variables_r = requests.get('https://peridash.ml/api/variables')
indicators_r = requests.get('https://peridash.ml/api/indicators')

<b>DATAFRAMES</b>

In [4]:
reports_df = pd.read_json(reports_r.text)
measurements_df = pd.read_json(measurements_r.text)
epsas_df = pd.read_json(epsas_r.text)

complete_reports_df = pd.merge(reports_df, epsas_df, left_on='epsa', right_on='url')
complete_measurements_df = pd.merge(measurements_df, epsas_df, left_on='epsa', right_on='url')

rdf = complete_reports_df
mdf = complete_measurements_df
vdf = pd.read_json(variables_r.text)
idf = pd.read_json(indicators_r.text)

In [5]:
ind_names = [
    'Rendimiento actual de la fuente', 'Uso eficiente del recurso',
    'Cobertura de muestras de agua potable',
    'Conformidad de los análisis de agua potable realizados',
    'Dotación', 'Continuidad por racionamiento', 'Continuidad por corte',
    'Cobertura del servicio de agua potable',
    'Cobertura del servicio de alcantarillado sanitario',
    'Cobertura de micromedición',
    'Incidencia extracción de agua cruda subterránea ',
    'Índice de tratamiento de agua residual', 'Control de agua residual',
    'Capacidad instalada de planta de tratamiento de agua potable',
    'Capacidad instalada de planta de tratamiento de agua residual ',
    'Presión del servicio de agua potable',
    'Índice de agua no contabilizada en producción',
    'Índice de agua no contabilizada en la red',
    'Densidad de fallas en tuberías de agua potable',
    'Densidad de fallas en conexiones de agua potable',
    'Densidad de fallas en tuberías de agua residual',
    'Densidad de fallas en conexiones de agua residual',
    'Índice de operación eficiente', 'Prueba ácida',
    'Eficiencia de recaudación', 'Índice de endeudamiento total', 'Tarifa media',
    'Costo unitario de operación', 'Índice de ejecución de inversiones',
    'Personal calificado', 'Número de empleados por cada 1000 conexiones',
    'Atención de reclamos'
]
ind_units = ['%', '%', '%', '%', 'l/hab/día', 'hr/día', '%', '%', '%', '%',
             '%', '%', '%', '%', '%', '%', '%', '%', 'fallas/100km',
             'fallas/1000conex.', 'fallas/100km', 'fallas/1000conex.', '%', '-',
             '%', '%', '%CUO(Bs.)', '%TM(Bs.)', '%', '%', 'empleados/1000conex.', '%']

colors = [
    '#1f77b4','#ff7f0e','#2ca02c',
    '#d62728', '#9467bd', '#8c564b',
    '#e377c2', '#7f7f7f', '#bcbd22','#17becf'
]

def get_ind_name(ind):
    return ind_names[int(ind[3:])-1]

def get_ind_unit(ind):
    return ind_units[int(ind[3:])-1]

In [224]:
selected_var = 'v22'
selected_ind = 'ind10'

l1 = list(rdf[(rdf.category == 'C') & (rdf.year == 2014)][selected_var])
l2 = list(rdf[(rdf.category == 'C') & (rdf.year == 2015)][selected_var])
l3 = list(rdf[(rdf.category == 'C') & (rdf.year == 2016)][selected_var])
l4 = list(rdf[(rdf.category == 'C') & (rdf.year == 2017)][selected_var])

# l1 = list(mdf[(mdf.category == 'C') & (mdf.year == 2014)][selected_ind])
# l2 = list(mdf[(mdf.category == 'C') & (mdf.year == 2015)][selected_ind])
# l3 = list(mdf[(mdf.category == 'C') & (mdf.year == 2016)][selected_ind])
# l4 = list(mdf[(mdf.category == 'C') & (mdf.year == 2017)][selected_ind])

[f'{x1} - {x2} - {x3} - {x4}' for x1,x2,x3,x4 in zip(l1,l2,l3,l4)]

['29672.0 - 31245.0 - 32340.0 - 29061.0',
 '12635.0 - 13267.0 - 15027.0 - 32340.0',
 '12829.0 - 13429.0 - 14006.0 - nan',
 '15481.0 - 16255.0 - 16956.0 - 15716.0',
 '34400.0 - 35122.0 - 35859.56 - 13044.0',
 '43445.0 - 46138.0 - 48749.0 - 15211.0',
 '15995.0 - 20415.0 - 23584.23 - 36613.0',
 '37289.0 - 38035.0 - 38788.0 - 51750.0',
 '23880.0 - 25462.0 - 26735.0 - 23999.0',
 '10215.0 - 10573.0 - 10945.0 - 39548.0',
 '27190.0 - 27494.0 - 28109.87 - 27399.0',
 '25447.0 - 30239.0 - 32557.0 - 12052.0',
 '24800.0 - 28971.0 - 28408.0 - 26950.0',
 '14265.0 - 18402.0 - 18800.0 - 19232.0',
 '20720.0 - 21248.0 - 15766.0 - 15797.0',
 '18128.0 - 18491.0 - 19384.0 - 33886.0',
 '20710.0 - 21381.0 - 21552.0 - 11136.0',
 '37114.0 - 39080.0 - 18860.0 - 20246.0',
 '42556.0 - 43284.0 - 21436.0 - 22759.0',
 '24677.0 - 30000.0 - 43294.0 - 19588.0',
 '24065.0 - 24259.0 - 31512.0 - 24499.0',
 '45048.0 - 46489.0 - 26429.0 - 22059.0',
 '10472.0 - 10535.0 - 24455.0 - 15576.0',
 '15950.0 - 16939.0 - 47884.0 - 424

<b>SETTINGS</b>

In [18]:
years = list(rdf.year.sort_values().unique()) # [2014, 2015, 2016, 2017]
categories = list(rdf.category.sort_values().unique()) # ['A', 'B', 'C', 'D']
# years = list(mdf.year.sort_values().unique()) # [2014, 2015, 2016, 2017]
# categories = list(mdf.category.sort_values().unique()) # ['A', 'B', 'C', 'D']

plane_dims = ['v22', 'v23']
size_dim = 'v22'

# plane_dims = ['ind17', 'ind18']
# size_dim = 'ind17'

xi = int(plane_dims[0][1:]) # 17
yi = int(plane_dims[1][1:]) # 18
# xi = int(plane_dims[0][3:])
# yi = int(plane_dims[1][3:])

xname = vdf[vdf.var_id==xi].name.iloc[0] # 'Número total de conexiones de agua potable activas medidas y no medidas'
yname = vdf[vdf.var_id==yi].name.iloc[0] # 'Número total de conexiones de alcantarillado sanitario activas '
xunit = vdf[vdf.var_id==xi].unit.iloc[0] # 'conex.'
yunit = vdf[vdf.var_id==yi].unit.iloc[0] # 'conex.'
# xname = ind_names[xi - 1] # 'Índice de agua no contabilizada en producción'
# yname = ind_names[yi - 1] # 'Índice de agua no contabilizada en la red'
# xunit = ind_units[xi - 1] # '%'
# yunit = ind_units[yi - 1] # '%'

code_to_cat = {code: cat for code,cat in zip(epsas_df.code, epsas_df.category)}

<b>DATA SIMULATED GRID</b>

In [161]:
cat_to_y = {cat: len(categories) - i for i,cat in enumerate(categories)}

grid_data = {}

for year in years:
    for category in categories:
        frdf = rdf[(rdf.year == year) & (rdf.category == category)]
                       
        percentages = [x/y * 100 for x,y in zip(frdf.v23, frdf.v22)]
        
        grid_data[f'{year}_{category}_text'] = [f'{code}<br>{"%.2f"%(p)}%<br>Población Abastecida: {a}<br>Población Total: {b}' for code,a,b,p in zip(frdf.code, frdf.v23, frdf.v22,percentages)]
        grid_data[f'{year}_{category}_x'] = percentages
        grid_data[f'{year}_{category}_y'] = [cat_to_y[code_to_cat[code]] for code in frdf.code]

<b>TRACE GENERATOR and BASE DATA</b>

In [162]:
def create_trace(year, category):
    return dict(
        x=[0.0 if math.isnan(x) else x for x in grid_data[f'{year}_{category}_x']],
        y=grid_data[f'{year}_{category}_y'],
        marker= dict(
            symbol='line-ns',
            size=25,
            opacity=0.7,
            line = dict(
              color = colors[dict(A=0, B=1, C=2, D=3)[category]],
              width = 2
            )
#             size=[0.0 if math.isnan(x) else x for x in grid_data[f'{year}_{category}_{size_dim}']],
#             sizemode='area',
#             sizeref=1000,
        ),
        mode= 'markers',
        text= grid_data[f'{year}_{category}_text'],
        name= 'Categoría: ' + category,
        hoverinfo = 'text',
    )

base_data = [create_trace(years[0], category) for category in categories]

<b>CHART FRAMES</b>

In [163]:
def create_frame(year):
    frame_data = [create_trace(year, category) for category in categories]
    return dict(data=frame_data, name=str(year))

frames = [create_frame(year) for year in years]

<b>CHART LAYOUT</b>

In [164]:
animation_settings = dict(
    frame = dict(duration=1200, redraw=False),
    fromcurrent = False,
    transition = dict(duration=1200, easing='cubic-in-out'),
)

def make_step(year):
    return dict(
        method = 'animate',  
        args = [[year], animation_settings],
        label= year
    )
steps = [make_step(str(year)) for year in years] 

sliders = [dict(
    active = 1,
    currentvalue = {
        'prefix': 'Año: ',
        'font': {'size': 20},
        'visible': True,
        'xanchor': 'right'
    },
    steps = steps,
    yanchor= 'top',
    xanchor= 'left',
    pad= {'b': 10, 't': 50},
    len= 0.9,
    x= 0.1,
    y= 0,
)]

updatemenus = [dict(
    buttons= [dict(
        args= [[str(y) for y in years], animation_settings],
        label= 'Animar',
        method= 'animate',
    )],
    direction= 'left',
    pad= dict(r=10, t=87),
    showactive= False,
    type= 'buttons',
    x= 0.1,
    y= 0,
    xanchor= 'right',
    yanchor= 'top',
)]

layout = go.Layout(
    title='Población Abastecida',
    hovermode='closest',
    width=1000,
    legend=dict(x=.1, y=1.1, orientation='h'),
    xaxis=dict(
        title='Porcentaje (%): Población Abastecida / Población Total del Área de Servicio * 100',
        range=[35,100],
        autorange=False,
        tickmode='linear',
        tick0 = 35,
        dtick = 5,
    ),
    yaxis=dict(visible=False),
#     xaxis={'title':f'Variable {str(xi)}: {xname} ({xunit})'},
#     yaxis={'title':f'Variable {str(yi)}: {yname} ({yunit})'},
    plot_bgcolor='#dfe8f3',
    sliders= sliders,
    updatemenus = updatemenus,
)

figure = go.Figure(data=base_data, frames=frames, layout=layout)
iplot(figure)