In [1]:

import pandas as pd


class GeoDataCensal(object):
    """GeoData of the censo information. There is information of the codes,
    names, censo and geometry.

    """

    def __init__(self, data_censal, var_hierarchy):
        self._check_inputs(data_censal, var_hierarchy)
        self.data_censal = data_censal
        self.var_hierarchy = var_hierarchy
        self.levels = len(var_hierarchy['codes'])

    def _check_inputs(self, data_censal, var_hierarchy):
        assert('geometry' in var_hierarchy)
        assert('codes' in var_hierarchy)
        assert('names' in var_hierarchy)
        assert('censo' in var_hierarchy)
        assert(isinstance(var_hierarchy['geometry'], str))
        assert(isinstance(var_hierarchy['codes'], list))
        assert(isinstance(var_hierarchy['names'], list))
        assert(isinstance(var_hierarchy['censo'], list))
        assert(len(var_hierarchy['codes']) == len(var_hierarchy['names']))

    def _get_columns_level(self, level, outer=False):
        outer_cols = []
        if outer:
            outer_cols = self.outer_cols
        geo_cols = [self.var_hierarchy['geometry']]
        if level == 0:
            columns = outer_cols+geo_cols
        elif level > self.levels or level < 0:
            msg = "Not correct 'level' input."
            raise IndexError(msg)
        else:
            names_cols = self.var_hierarchy['names'][self.levels-level:]
            codes_cols = self.var_hierarchy['codes'][self.levels-level:]
            columns = list(set(outer_cols+names_cols+codes_cols+geo_cols))
        return columns

    def _filter4level(self, level, outer=False):
        return self.data_censal[self._get_columns_level(level, outer)]

    @classmethod
    def _data_reduction(cls, data_censo, var_hierarchy):
        return cls(data_censo, var_hierarchy)

    @property
    def inner_cols(self):
        inner_cols = [var_hierarchy['geometry']]
        inner_cols += var_hierarchy['codes']
        inner_cols += var_hierarchy['names']
        inner_cols += var_hierarchy['censo']
        inner_cols = list(set(inner_cols))
        return inner_cols

    @property
    def outer_cols(self):
        outer_cols = [col for col in self.data_censal
                      if col not in self.inner_cols]
        return outer_cols

    def _filter_censo_by_level(self, level, outer=False):
        data_censo = self._filter4level(level, outer)
        pivot_lvl_col = self.var_hierarchy['codes'][self.levels-level]
        data = data_censo.dissolve(by=pivot_lvl_col).reset_index(level=0)
        if self.var_hierarchy['censo']:
            data_censal = self.data_censal[[pivot_lvl_col]]
            vars_censo = self.var_hierarchy['censo']
            data_censal[vars_censo] = self.data_censal[vars_censo]
            data_cns = data_censal.dissolve(by=pivot_lvl_col,
                                            aggfunc="sum")
            data_cns = data_cns.reset_index(level=0)
            data = pd.merge(data, data_cns, on=pivot_lvl_col)
        return data

    def _filter_hierharchy_by_level(self, level):
        var_hierarchy = {}
        var_hierarchy['geometry'] = self.var_hierarchy['geometry']
        var_hierarchy['censo'] = self.var_hierarchy['censo']
        pivot_lvl = self.levels-level
        var_hierarchy['codes'] = self.var_hierarchy['codes'][pivot_lvl:]
        var_hierarchy['names'] = self.var_hierarchy['names'][pivot_lvl:]
        return var_hierarchy

    def filter_data_by_level(self, level, outer=False, raw=False):
        data_censo = self._filter_censo_by_level(level, outer)
        var_hierarchy = self._filter_hierharchy_by_level(level)
        if raw:
            return data_censo, var_hierarchy
        else:
            return self._data_reduction(data_censo, var_hierarchy)


class Votations(object):
    """Votations data. The data of the results of votes by party in columns.
    Each columns has a party and some codes. The result is the votes.

    """

    def __init__(self, votes, var_info, collapse_ways):
        self._check_inputs(votes, var_info, collapse_ways)
        self.collapses = collapse_ways
        self.var_info = var_info
        self.votes = votes

    def _check_inputs(self, votes, var_info, collapse_ways):
        assert('codes' in var_info)
        assert('parties' in var_info)
        assert(isinstance(var_info['parties'], list))
        assert(isinstance(var_info['codes'], list))
        assert(isinstance(collapse_ways, dict))
        for w, ps in collapse_ways.items():
            assert(all([p in var_info['parties'] for p in ps]))

    def get_results(self, code, collapse_way=None):
        if collapse_way is None:
            collapsing = dict(zip(self.var_info['parties'],
                                  self.var_info['parties']))
        else:
            collapsing = self.collapses[collapse_way]
        votes_collapse = self.votes[[code]]
        for group, parties in collapsing.items():
            votes_collapse[group] = self.votes[parties].sum(1)
        votes = votes_collapse.dissolve(by=code, aggfunc='sum')
        votes = votes.reset_index(level=0)
        return votes


In [2]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import folium

import matplotlib
%matplotlib inline

In [3]:
from splitters import Splitters
from parsing_utils import parse_eleccions_2015_csv, collapse_votes, collapse_info_mesas, left_filter_function
from geoplotting import results_map

In [4]:
file_geojson = '/home/tono/code/Eleccions2017Cat/data/trio_llobregat_2016.geojson'
data_censo = gpd.read_file(file_geojson)

In [5]:
votos_2017_folder = '/home/tono/code/Eleccions2017Cat/data/A20171_ME'
votos_2017_info = os.path.join(votos_2017_folder, '09-meses-info.csv')
votos_2017_vots = os.path.join(votos_2017_folder, '09-meses-vots.csv')
rawdata2017_info = pd.read_csv(votos_2017_info)
rawdata2017_vots = pd.read_csv(votos_2017_vots)

In [6]:
rawdata2017_info

Unnamed: 0,Codi circumscripci�,Codi municipi,Nom municipi,Districte,Secci�,Mesa,Cens electoral,Votants,% votants,Abstenci�,...,Vots nuls,% vots nuls,Vots en blanc,% vots en blanc,Vots a candidatures,% vots a candidatures,Vots v�lids,% vots v�lids,Codi comarca,Nom comarca
0,8,1,Abrera ...,1,1,A,369.0,316.0,08564,53,...,0,00000,0,00000,316.0,10000,316.0,10000,11,Baix Llobregat
1,8,1,Abrera ...,1,1,B,366.0,318.0,08689,48,...,2,00063,3,00094,313.0,09843,316.0,09937,11,Baix Llobregat
2,8,1,Abrera ...,1,1,C,407.0,360.0,08845,47,...,2,00056,1,00028,357.0,09917,358.0,09944,11,Baix Llobregat
3,8,1,Abrera ...,1,2,A,420.0,366.0,08714,54,...,1,00027,2,00055,363.0,09918,365.0,09973,11,Baix Llobregat
4,8,1,Abrera ...,1,2,B,412.0,350.0,08495,62,...,0,00000,3,00086,347.0,09914,350.0,10000,11,Baix Llobregat
5,8,1,Abrera ...,1,2,C,432.0,360.0,08333,72,...,1,00028,1,00028,358.0,09944,359.0,09972,11,Baix Llobregat
6,8,1,Abrera ...,1,3,U,570.0,508.0,08912,62,...,1,00020,2,00039,505.0,09941,507.0,09980,11,Baix Llobregat
7,8,1,Abrera ...,1,4,A,832.0,728.0,08750,104,...,4,00055,2,00027,722.0,09918,724.0,09945,11,Baix Llobregat
8,8,1,Abrera ...,1,4,B,520.0,463.0,08904,57,...,2,00043,4,00086,457.0,09870,461.0,09957,11,Baix Llobregat
9,8,1,Abrera ...,1,5,A,671.0,546.0,08137,125,...,3,00055,5,00092,538.0,09853,543.0,09945,11,Baix Llobregat


In [7]:
def create_cusec_from_data(data, prov_col, muni_col, dist_col, sec_col):
    def transform_int_codes_col(series, n):
        return series.apply(lambda x: str(int(x)).zfill(n))

    if type(prov_col) == str:
        provs = data[prov_col]
    else:
        provs = data.iloc[:, int(prov_col)]
    provs = transform_int_codes_col(provs, 2)
    if type(prov_col) == str:
        munis = data[muni_col]
    else:
        munis = data.iloc[:, int(muni_col)]
    munis = transform_int_codes_col(munis, 3)
    if type(dist_col) == str:
        distr = data[dist_col]
    else:
        distr = data.iloc[:, int(dist_col)]
    distr = transform_int_codes_col(distr, 2)
    if type(sec_col) == str:
        seccs = data[sec_col]
    else:
        seccs = data.iloc[:, int(sec_col)]
    seccs = transform_int_codes_col(seccs, 3)
    cusec = provs+munis+distr+seccs
    return cusec
        

In [8]:
cusec = create_cusec_from_data(rawdata2017_info, 0, 1, 3, 4)

In [9]:
rawdata2017_info['CUSEC'] = cusec
rawdata2017_vots['CUSEC'] = cusec

In [10]:
## Filter trio
codes_trio = {'Cornella': '08073',
              'Esplugues': '08077',
              'Hospitalet': '08101'}
def cmun_filter(data, cumuns):
    return data.loc[data.CUSEC.apply(lambda x: x[:5] in cumuns)]
trio_data2017_info = cmun_filter(rawdata2017_info, codes_trio.values())
trio_data2017_vots = cmun_filter(rawdata2017_vots, codes_trio.values())


In [11]:
votos2017_trio_ll, codes_not_in_censo, codes_not_votos2017 =\
    left_filter_function(data_censo, trio_data2017_vots, filter_code='CUSEC')

codes_not_in_censo, codes_not_votos2017

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  to_filter_data[filter_code] = to_filter_data[filter_code].apply(to_str)
  indexer = self._engine.get_indexer(target._values)


(set(), set())

In [12]:
trio_data2017_vots_c = pd.pivot_table(trio_data2017_vots, index='CUSEC', aggfunc='sum')
trio_data2017_vots_c.head()

Unnamed: 0_level_0,Candidatura d'Unitat Popular (CUP),Catalunya en Com�-Podem (CatCom�-Podem),Ciutadans-Partido de la Ciudadan�a (C's),Di�leg Republic� (Candidatura retirada) (DI�LEG (C. RETIRADA)),Esquerra Republicana-Catalunya S� (ERC-CatS�),Junts per Catalunya (JUNTSxCAT),Partit Animalista contra el Maltractament Animal (PACMA),Partit Popular / Partido Popular (PP),Partit dels Socialistes de Catalunya (PSC-PSOE) (PSC),Per un M�n M�s Just (PU M+J),Recortes Cero-Grupo Verde (RECORTES CERO-GRUPO VERDE)
CUSEC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
807301001,68,172,402,0,317,192,17,55,286,0,4
807301002,60,147,324,0,212,79,6,52,248,0,3
807301003,31,113,255,0,129,52,9,44,229,0,6
807301004,40,162,417,0,249,83,11,36,315,0,4
807301005,39,77,228,0,146,61,12,26,134,0,4


In [13]:
d = trio_data2017_vots_c.iloc[:, [0, 4, 5]].sum(1)/trio_data2017_vots_c.sum(1)
proind = pd.DataFrame(d, columns=['ProInd'])
#proind = pd.concat([pd.DataFrame(trio_data2017_vots_c.index), proind], axis=1)
proind.reset_index(level=0, inplace=True)
logi = proind['CUSEC'].apply(lambda x: x not in ['0810103026', '0810103027'])
proind = proind[logi]
proind

Unnamed: 0,CUSEC,ProInd
0,0807301001,0.381362
1,0807301002,0.310345
2,0807301003,0.244240
3,0807301004,0.282460
4,0807301005,0.338377
5,0807301006,0.458000
6,0807301007,0.449848
7,0807301008,0.344234
8,0807301009,0.381026
9,0807301010,0.327492


In [14]:
results_map([41.37, 2.1], 13, data_censo, proind, 'CUSEC', 'ProInd')

In [15]:
data_censo

Unnamed: 0,MUNICIPI,DISTRICTE,SECCIO,MUNDISSEC,CUSEC,geometry
0,080734,01,001,08073401001,0807301001,"POLYGON ((2.074376083999041 41.35369666126039,..."
1,080734,01,002,08073401002,0807301002,"POLYGON ((2.070261628641537 41.35732904687536,..."
2,080734,01,003,08073401003,0807301003,"POLYGON ((2.074805573469447 41.35164380484778,..."
3,080734,01,004,08073401004,0807301004,"POLYGON ((2.072724928366699 41.3513274787128, ..."
4,080734,01,005,08073401005,0807301005,"POLYGON ((2.073997296246109 41.35151865113824,..."
5,080734,01,006,08073401006,0807301006,"POLYGON ((2.072702814039318 41.35172865467292,..."
6,080734,01,007,08073401007,0807301007,"POLYGON ((2.070900054437468 41.35258190627427,..."
7,080734,01,008,08073401008,0807301008,"POLYGON ((2.067388960109325 41.35394832544948,..."
8,080734,01,009,08073401009,0807301009,"POLYGON ((2.070671214546052 41.35602778890644,..."
9,080734,01,010,08073401010,0807301010,"POLYGON ((2.074062295279431 41.35262644157508,..."
