In [1]:

import pandas as pd


class GeoDataCensal(object):
    """GeoData of the censo information. There is information of the codes,
    names, censo and geometry.

    """

    def __init__(self, data_censal, var_hierarchy):
        self._check_inputs(data_censal, var_hierarchy)
        self.data_censal = data_censal
        self.var_hierarchy = var_hierarchy
        self.levels = len(var_hierarchy['codes'])

    def _check_inputs(self, data_censal, var_hierarchy):
        assert('geometry' in var_hierarchy)
        assert('codes' in var_hierarchy)
        assert('names' in var_hierarchy)
        assert('censo' in var_hierarchy)
        assert(isinstance(var_hierarchy['geometry'], str))
        assert(isinstance(var_hierarchy['codes'], list))
        assert(isinstance(var_hierarchy['names'], list))
        assert(isinstance(var_hierarchy['censo'], list))
        assert(len(var_hierarchy['codes']) == len(var_hierarchy['names']))

    def _get_columns_level(self, level, outer=False):
        outer_cols = []
        if outer:
            outer_cols = self.outer_cols
        geo_cols = [self.var_hierarchy['geometry']]
        if level == 0:
            columns = outer_cols+geo_cols
        elif level > self.levels or level < 0:
            msg = "Not correct 'level' input."
            raise IndexError(msg)
        else:
            names_cols = self.var_hierarchy['names'][self.levels-level:]
            codes_cols = self.var_hierarchy['codes'][self.levels-level:]
            columns = list(set(outer_cols+names_cols+codes_cols+geo_cols))
        return columns

    def _filter4level(self, level, outer=False):
        return self.data_censal[self._get_columns_level(level, outer)]

    @classmethod
    def _data_reduction(cls, data_censo, var_hierarchy):
        return cls(data_censo, var_hierarchy)

    @property
    def inner_cols(self):
        inner_cols = [var_hierarchy['geometry']]
        inner_cols += var_hierarchy['codes']
        inner_cols += var_hierarchy['names']
        inner_cols += var_hierarchy['censo']
        inner_cols = list(set(inner_cols))
        return inner_cols

    @property
    def outer_cols(self):
        outer_cols = [col for col in self.data_censal
                      if col not in self.inner_cols]
        return outer_cols

    def _filter_censo_by_level(self, level, outer=False):
        data_censo = self._filter4level(level, outer)
        pivot_lvl_col = self.var_hierarchy['codes'][self.levels-level]
        data = data_censo.dissolve(by=pivot_lvl_col).reset_index(level=0)
        if self.var_hierarchy['censo']:
            data_censal = self.data_censal[[pivot_lvl_col]]
            vars_censo = self.var_hierarchy['censo']
            data_censal[vars_censo] = self.data_censal[vars_censo]
            data_cns = data_censal.dissolve(by=pivot_lvl_col,
                                            aggfunc="sum")
            data_cns = data_cns.reset_index(level=0)
            data = pd.merge(data, data_cns, on=pivot_lvl_col)
        return data

    def _filter_hierharchy_by_level(self, level):
        var_hierarchy = {}
        var_hierarchy['geometry'] = self.var_hierarchy['geometry']
        var_hierarchy['censo'] = self.var_hierarchy['censo']
        pivot_lvl = self.levels-level
        var_hierarchy['codes'] = self.var_hierarchy['codes'][pivot_lvl:]
        var_hierarchy['names'] = self.var_hierarchy['names'][pivot_lvl:]
        return var_hierarchy

    def filter_data_by_level(self, level, outer=False, raw=False):
        data_censo = self._filter_censo_by_level(level, outer)
        var_hierarchy = self._filter_hierharchy_by_level(level)
        if raw:
            return data_censo, var_hierarchy
        else:
            return self._data_reduction(data_censo, var_hierarchy)


class Votations(object):
    """Votations data. The data of the results of votes by party in columns.
    Each columns has a party and some codes. The result is the votes.

    """

    def __init__(self, votes, var_info, collapse_ways):
        self._check_inputs(votes, var_info, collapse_ways)
        self.collapses = collapse_ways
        self.var_info = var_info
        self.votes = votes

    def _check_inputs(self, votes, var_info, collapse_ways):
        assert('codes' in var_info)
        assert('parties' in var_info)
        assert(isinstance(var_info['parties'], list))
        assert(isinstance(var_info['codes'], list))
        assert(isinstance(collapse_ways, dict))
        for w, ps in collapse_ways.items():
            assert(all([p in var_info['parties'] for p in ps]))

    def get_results(self, code, collapse_way=None):
        if collapse_way is None:
            collapsing = dict(zip(self.var_info['parties'],
                                  self.var_info['parties']))
        else:
            collapsing = self.collapses[collapse_way]
        votes_collapse = self.votes[[code]]
        for group, parties in collapsing.items():
            votes_collapse[group] = self.votes[parties].sum(1)
        votes = votes_collapse.dissolve(by=code, aggfunc='sum')
        votes = votes.reset_index(level=0)
        return votes


In [2]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import folium

import matplotlib
%matplotlib inline

In [3]:
from splitters import Splitters
from parsing_utils import parse_eleccions_2015_csv, collapse_votes, collapse_info_mesas, left_filter_function

In [4]:
file_geojson = '/home/tono/code/Eleccions2017Cat/data/trio_llobregat_2015.geojson'
data_censo = gpd.read_file(file_geojson)

In [5]:
votos_2015_file = '/home/tono/code/Eleccions2017Cat/data/A20151_ME/Columnes_A20151_ME_ca_ES.csv'
data_info2015, votos2015 = parse_eleccions_2015_csv(votos_2015_file)

In [6]:
votos2015 = collapse_votes(votos2015)
data_info2015 = collapse_info_mesas(data_info2015)

In [7]:
data_info2015.columns

Index([u'CUSEC', u'Codi Prov�ncia', u'Nom Prov�ncia', u'Codi Vegueria',
       u'Nom Vegueria', u'Codi Comarca', u'Nom Comarca', u'Codi Municipi',
       u'Nom Municipi', u'Districte', u'Secci�', u'Cens electoral',
       u'Participaci� 13:00', u'Participaci� 18.00', u'Participaci� 20.00',
       u'Abstenci�', u'Vots nuls', u'Vots en blanc', u'Vots a candidatures',
       u'Vots v�lids'],
      dtype='object')

In [8]:
trio_cities = ["Cornell\xe0 de Llobregat", "Esplugues de Llobregat",
               "Hospitalet de Llobregat, l'"]
codes_trio = {'Cornella': '08073',
              'Esplugues': '08077',
              'Hospitalet': '08101'}
def cmun_filter(data, cumuns):
    return data.loc[data.CUSEC.apply(lambda x: x[:5] in cumuns)]
trio_data_info2015 = cmun_filter(data_info2015, codes_trio.values())
trio_votos2015 = cmun_filter(votos2015, codes_trio.values())


### Check not concident keys

In [9]:
votos2015_trio_ll, codes_not_in_censo, codes_not_votos2015 =\
    left_filter_function(data_censo, trio_votos2015, filter_code='CUSEC')

codes_not_in_censo, codes_not_votos2015

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  to_filter_data[filter_code] = to_filter_data[filter_code].apply(to_str)


(set(), set())

In [10]:
trio_data_info2015.loc[trio_data_info2015.CUSEC.apply(lambda x: x in ['0810103026', '0810103027']), :]

Unnamed: 0,CUSEC,Codi Prov�ncia,Nom Prov�ncia,Codi Vegueria,Nom Vegueria,Codi Comarca,Nom Comarca,Codi Municipi,Nom Municipi,Districte,Secci�,Cens electoral,Participaci� 13:00,Participaci� 18.00,Participaci� 20.00,Abstenci�,Vots nuls,Vots en blanc,Vots a candidatures,Vots v�lids
1752,810103026,8,Barcelona,1,Barcelona,13,Barcelon�s,8101,"Hospitalet de Llobregat, l'",3,26,663,220,426,543,120,1,2,540,542
1753,810103027,8,Barcelona,1,Barcelona,13,Barcelon�s,8101,"Hospitalet de Llobregat, l'",3,27,902,266,574,732,170,2,1,729,730


### Manual checking data

In [75]:
lh_territories = data_info2015.iloc[:, 7].apply(lambda x: x in [trio_cities[2]])
codes_data_info = data_info2015.iloc[np.where(lh_territories)[0], [0, 2, 4, 6, 8, 9, 10, 20]]
data_filtered = codes_data_info.groupby(by='CUSEC').first()
postfilter = data_filtered['Districte'].apply(lambda x: x == 2)
len(data_filtered.iloc[np.where(postfilter)[0], :-1])

37

In [123]:
lh_territories = data_censo['CUMUN'].apply(lambda x: x in [codes_trio['Hospitalet']])
geos_data = data_censo.loc[lh_territories, :]
postfilter = geos_data['CDIS'].apply(lambda x: x == '03')
geos_data.iloc[np.where(postfilter)[0], :-1]


Unnamed: 0,OBJECTID,CUSEC,CUMUN,CSEC,CDIS,CMUN,CPRO,CCA,CUDIS,OBS,...,CNUT1,CNUT2,CNUT3,CLAU2,NPRO,NCA,NMUN,Shape_Leng,Shape_area,Shape_len
188,5468,810103001,8101,1,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",2123.89082,233567.718319,2123.89082
189,5469,810103002,8101,2,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",833.71036,35996.566769,833.71036
190,5470,810103003,8101,3,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",610.232224,21545.110651,610.232224
191,5471,810103004,8101,4,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",1802.861597,151432.401799,1802.861597
192,5472,810103005,8101,5,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",1024.74997,40664.451741,1024.74997
193,5473,810103006,8101,6,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",1035.388672,43244.099506,1035.388672
194,5474,810103007,8101,7,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",543.880699,18468.786956,543.880699
195,5475,810103008,8101,8,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",1115.305583,65292.507486,1115.305583
196,5476,810103009,8101,9,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",919.712402,39517.730602,919.712402
197,5477,810103010,8101,10,3,101,8,9,810103,,...,5,1,1,8101,Barcelona,Catalua,"Hospitalet de Llobregat, L'",504.786206,15370.295343,504.786206


## Mapping

In [11]:
d = trio_votos2015.iloc[:, [5, 8]].sum(1)/trio_votos2015.iloc[:, 1:].sum(1)
proind = pd.DataFrame(d, columns=['ProInd'])
proind = pd.concat([trio_votos2015['CUSEC'], proind], axis=1)

In [12]:
def choropleth_map(center_loc, zoom_start, file_geojson, results, key_on, key_data):
    m = folium.Map(center_loc, tiles='cartodbpositron',
                   zoom_start=zoom_start)
    columns = [key_on, key_data]
    key_on_v = 'feature.properties.' + key_on
    m.choropleth(
        geo_data=file_geojson,
        data=results,
        columns=columns,
        key_on=key_on_v,
        fill_color='YlGn',
        highlight=True,
        )
    return m

choropleth_map([41.37, 2.1], 13, file_geojson, proind, 'CUSEC', 'ProInd')

In [13]:
import seaborn as sns
import matplotlib


def results_map(center_loc, zoom_start, geodata, results, key_on, key_data):
    m = folium.Map(center_loc, tiles='cartodbpositron',
                   zoom_start=zoom_start)

    n_splits = 10
    par_splits = {'n_splits': n_splits, 'limits': (0, 1),
                  'open_limits': False, 'reverse': False}
    splitter = Splitters('equispaced_splitters', par_splits,
                         ranges=list(range(n_splits)))
    colors = sns.color_palette('RdYlGn', n_splits)

    def color_map(x):
        return matplotlib.colors.rgb2hex(colors[splitter(x)])

    def create_function_style(features):
        style_function = lambda x: {'fillColor': color_map(features),
                                    'fillOpacity': 0.4,
                                    'color': 'black',
                                    'weight': 0.5,
                                    'opacity': 1}
        return style_function


    highlight_function = lambda x: {'weight': 1.5,
                                    'fillOpacity': 0.75}

    for i in range(len(data_censo)):
        geodata_i = geodata.iloc[[i], :]
        key_on_i = geodata_i[key_on].ravel()[0]
        idxs = np.where(results[key_on].apply(lambda x: x == key_on_i))[0]
        if not len(idxs):
            continue
        idx_key = idxs[0]
        style_function = create_function_style(results.iloc[idx_key, :][key_data])
        geo_cens = folium.folium.GeoJson(geodata_i,
                                         style_function=style_function,
                                         highlight_function=highlight_function,
                                         overlay=True,
                                         control=False)
        geo_cens.add_child(folium.features.Popup(key_on_i))
        geo_cens.add_to(m, geodata.iloc[i, :][key_on])
    return m

results_map([41.37, 2.1], 13, data_censo, proind, 'CUSEC', 'ProInd')

In [157]:
np.where(proind.CUSEC.apply(lambda x: x == data_censo.CUSEC.iloc[324]))[0]

array([285])

In [162]:
geodata_i = data_censo.iloc[[0], :]
key_on_i = geodata_i['CUSEC']

In [7]:
data_censo

Unnamed: 0,OBJECTID,CUSEC,CUMUN,CSEC,CDIS,CMUN,CPRO,CCA,CUDIS,OBS,...,CNUT2,CNUT3,CLAU2,NPRO,NCA,NMUN,Shape_Leng,Shape_area,Shape_len,geometry
0,5155,0807301001,08073,001,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,1352.569111,9.218862e+04,1352.569111,"POLYGON ((2.07086332857145 41.35740405853809, ..."
1,5156,0807301002,08073,002,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,1009.427596,5.111587e+04,1009.427596,"POLYGON ((2.070671819595459 41.35602864763179,..."
2,5157,0807301003,08073,003,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,690.436732,2.965648e+04,690.436732,"POLYGON ((2.077546728450129 41.35071454754457,..."
3,5158,0807301004,08073,004,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,5437.460220,9.279912e+05,5437.460220,"POLYGON ((2.049558400975442 41.34744746709448,..."
4,5159,0807301005,08073,005,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,923.226121,3.932151e+04,923.226121,"POLYGON ((2.076844113283871 41.35191659116927,..."
5,5160,0807301006,08073,006,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,757.690417,3.313607e+04,757.690417,"POLYGON ((2.072357123758789 41.35348683736885,..."
6,5161,0807301007,08073,007,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,1142.524590,7.279121e+04,1142.524590,"POLYGON ((2.071632524362656 41.35349974731189,..."
7,5162,0807301008,08073,008,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,2242.279095,2.072551e+05,2242.279095,"POLYGON ((2.067353723362825 41.35395055034011,..."
8,5163,0807301009,08073,009,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,950.901553,3.603848e+04,950.901554,"POLYGON ((2.073513537376484 41.35354297793097,..."
9,5164,0807301010,08073,010,01,073,08,09,0807301,,...,1,1,08073,Barcelona,Catalua,Cornell de Llobregat,881.616266,3.625071e+04,881.616266,"POLYGON ((2.076351447228844 41.35300580311168,..."
