In [114]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display

data_dir = Path("../data")
president_csv = data_dir / "president.csv"
president_rates_fth = data_dir / "president_rates.fth"

def display_all(df):
    with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000):
        display(df)

# Data

In [83]:
votes = pd.read_csv(president_csv,
                    dtype={
                        "RECEIVED_VOTES": np.int32,
                        "VALID_VOTES": np.int32,
                        "INVALID_VOTES": np.int32,
                        "ISSUED_VOTES": np.int32,
                        "UNISSUED_VOTES": np.int32,
                        "POPULATION": np.int32,
                        "VOTING_RATE": np.float32,
                    }); votes

Unnamed: 0,YEAR,CITY_COUNTY,TOWNSHIP_CITY,VILLAGE,BOOTH_NUMBER,CANDIDATE,PARTY,RECEIVED_VOTES,VALID_VOTES,INVALID_VOTES,ISSUED_VOTES,UNISSUED_VOTES,POPULATION,VOTING_RATE
0,2008,臺北市,北投區,建民里,1,(1)謝長廷蘇貞昌,DPP,711,1312,12,1324,359,1683,78.669998
1,2008,臺北市,北投區,建民里,1,(2)馬英九蕭萬長,KMT,601,1312,12,1324,359,1683,78.669998
2,2008,臺北市,北投區,建民里,2,(1)謝長廷蘇貞昌,DPP,685,1391,4,1395,333,1728,80.730003
3,2008,臺北市,北投區,建民里,2,(2)馬英九蕭萬長,KMT,706,1391,4,1395,333,1728,80.730003
4,2008,臺北市,北投區,文林里,3,(1)謝長廷蘇貞昌,DPP,458,940,6,946,234,1180,80.169998
5,2008,臺北市,北投區,文林里,3,(2)馬英九蕭萬長,KMT,482,940,6,946,234,1180,80.169998
6,2008,臺北市,北投區,文林里,4,(1)謝長廷蘇貞昌,DPP,485,1103,9,1112,192,1304,85.279999
7,2008,臺北市,北投區,文林里,4,(2)馬英九蕭萬長,KMT,618,1103,9,1112,192,1304,85.279999
8,2008,臺北市,北投區,文林里,5,(1)謝長廷蘇貞昌,DPP,435,995,5,1000,233,1233,81.099998
9,2008,臺北市,北投區,文林里,5,(2)馬英九蕭萬長,KMT,560,995,5,1000,233,1233,81.099998


In [116]:
def compute_rates(votes):
    df = votes.copy()
    del df["BOOTH_NUMBER"]
    del df["CANDIDATE"]
    del df["VOTING_RATE"]
    rates = df.groupby(
        ["YEAR", "CITY_COUNTY", "TOWNSHIP_CITY", "VILLAGE", "PARTY"]
    ).aggregate({
        "RECEIVED_VOTES": "sum",
        "VALID_VOTES": "sum",
        "ISSUED_VOTES": "sum",
        "POPULATION": "sum",
    }).reset_index()
    rates["RATE"] = rates.RECEIVED_VOTES / rates.ISSUED_VOTES
    rates["VOTING_RATE"] = rates.ISSUED_VOTES / rates.POPULATION
    return rates
    
rates = compute_rates(votes); display_all(rates)

Unnamed: 0,YEAR,CITY_COUNTY,TOWNSHIP_CITY,VILLAGE,PARTY,RECEIVED_VOTES,VALID_VOTES,ISSUED_VOTES,POPULATION,RATE,VOTING_RATE
0,2008,南投縣,中寮鄉,中寮村,DPP,150,510,514,700,0.291829,0.734286
1,2008,南投縣,中寮鄉,中寮村,KMT,360,510,514,700,0.700389,0.734286
2,2008,南投縣,中寮鄉,內城村,DPP,115,310,313,473,0.367412,0.661734
3,2008,南投縣,中寮鄉,內城村,KMT,195,310,313,473,0.623003,0.661734
4,2008,南投縣,中寮鄉,八仙村,DPP,250,522,524,764,0.477099,0.685864
5,2008,南投縣,中寮鄉,八仙村,KMT,272,522,524,764,0.519084,0.685864
6,2008,南投縣,中寮鄉,和興村,DPP,221,445,450,692,0.491111,0.650289
7,2008,南投縣,中寮鄉,和興村,KMT,224,445,450,692,0.497778,0.650289
8,2008,南投縣,中寮鄉,崁頂村,DPP,166,364,369,511,0.449864,0.722114
9,2008,南投縣,中寮鄉,崁頂村,KMT,198,364,369,511,0.536585,0.722114


In [102]:
rates.to_feather(president_rates_fth)

# Explore

In [142]:
rates = pd.read_feather(president_rates_fth)

In [143]:
display_all(
    rates[rates.CITY_COUNTY == "南投縣"][rates.TOWNSHIP_CITY == "中寮鄉"]
    .sort_values(["VILLAGE", "PARTY", "YEAR"])
)

  


Unnamed: 0,YEAR,CITY_COUNTY,TOWNSHIP_CITY,VILLAGE,PARTY,RECEIVED_VOTES,VALID_VOTES,ISSUED_VOTES,POPULATION,RATE,VOTING_RATE
0,2008,南投縣,中寮鄉,中寮村,DPP,150,510,514,700,0.291829,0.734286
15596,2012,南投縣,中寮鄉,中寮村,DPP,199,468,469,699,0.424307,0.670959
39017,2016,南投縣,中寮鄉,中寮村,DPP,189,367,372,662,0.508065,0.561934
1,2008,南投縣,中寮鄉,中寮村,KMT,360,510,514,700,0.700389,0.734286
15597,2012,南投縣,中寮鄉,中寮村,KMT,261,468,469,699,0.556503,0.670959
39018,2016,南投縣,中寮鄉,中寮村,KMT,132,367,372,662,0.354839,0.561934
15598,2012,南投縣,中寮鄉,中寮村,PFP,8,468,469,699,0.017058,0.670959
39019,2016,南投縣,中寮鄉,中寮村,PFP,46,367,372,662,0.123656,0.561934
2,2008,南投縣,中寮鄉,內城村,DPP,115,310,313,473,0.367412,0.661734
15599,2012,南投縣,中寮鄉,內城村,DPP,137,302,307,469,0.446254,0.654584


In [156]:
def compute_rate_changes(rates):
    ch = changes = rates.copy()
    ch["RATE_2008"] = ch.RATE
    ch["RATE_2012"] = ch.RATE
    
    def get_past_rate(year, row):
        past_row = \
            ch[ch.YEAR == year][ch.CITY_COUNTY == row.CITY_COUNTY][ch.TOWNSHIP_CITY == row.TOWNSHIP_CITY][ch.VILLAGE == row.VILLAGE][ch.PARTY == row.PARTY]
        if len(past_row) > 1:
            raise Exception(f"Found more rows in {year}: {row}")
        elif len(past_row) == 0:
            #print(f"Found no rows in {year}: {row}")
            return 0
        else:
            return past_row.iloc[0].RATE

    for i in ch.index:
        row = ch.loc[i]
        if row.YEAR == 2008:
            ch.loc[i, "RATE_2012"] = 0
        elif row.YEAR == 2012:
            ch.loc[i, "RATE_2008"] = get_past_rate(2008, row)
        elif row.YEAR == 2016:
            ch.loc[i, "RATE_2008"] = get_past_rate(2008, row)
            ch.loc[i, "RATE_2012"] = get_past_rate(2012, row)
        else:
            raise Exception(f"Unknown year: {row}")

    return changes

r0 = rates[rates.VILLAGE == "中寮村"]
print(r0.index)
display_all(compute_rate_changes(r0))

Int64Index([    0,     1,  2670,  2671, 10712, 10713, 11972, 11973, 15596,
            15597, 15598, 19529, 19530, 19531, 33440, 33441, 33442, 39017,
            39018, 39019, 42944, 42945, 42946, 56912, 56913, 56914],
           dtype='int64')


  


Unnamed: 0,YEAR,CITY_COUNTY,TOWNSHIP_CITY,VILLAGE,PARTY,RECEIVED_VOTES,VALID_VOTES,ISSUED_VOTES,POPULATION,RATE,VOTING_RATE,RATE_2008,RATE_2012
0,2008,南投縣,中寮鄉,中寮村,DPP,150,510,514,700,0.291829,0.734286,0.291829,0.0
1,2008,南投縣,中寮鄉,中寮村,KMT,360,510,514,700,0.700389,0.734286,0.700389,0.0
2670,2008,屏東縣,枋寮鄉,中寮村,DPP,275,485,490,670,0.561224,0.731343,0.561224,0.0
2671,2008,屏東縣,枋寮鄉,中寮村,KMT,210,485,490,670,0.428571,0.731343,0.428571,0.0
10712,2008,臺南縣,七股鄉,中寮村,DPP,322,544,550,776,0.585455,0.708763,0.585455,0.0
10713,2008,臺南縣,七股鄉,中寮村,KMT,222,544,550,776,0.403636,0.708763,0.403636,0.0
11972,2008,臺東縣,綠島鄉,中寮村,DPP,44,473,473,883,0.093023,0.535674,0.093023,0.0
11973,2008,臺東縣,綠島鄉,中寮村,KMT,429,473,473,883,0.906977,0.535674,0.906977,0.0
15596,2012,南投縣,中寮鄉,中寮村,DPP,199,468,469,699,0.424307,0.670959,0.291829,0.424307
15597,2012,南投縣,中寮鄉,中寮村,KMT,261,468,469,699,0.556503,0.670959,0.700389,0.556503
