In [1]:
import csv
import datetime
import itertools

In [2]:
import pandas as pd
import numpy as np

In [3]:
N_TOTAL_DEPUTADOS = 230
N_SONDAGENS = 4
ERRO_PCT = 1  # Pct do total de votos
PARTIDOS = ['PS', 'PSD', 'BE', 'CDU', 'CDS', 'PAN', 'CH', 'IL', 'L']

In [4]:
with open('numero_deputados.csv', encoding="utf-8") as csv_file:
    reader = csv.reader(csv_file)
    n_deps = {dist: int(n) for dist, n in reader}

assert sum(n_deps.values()) == N_TOTAL_DEPUTADOS

In [5]:
sondagens = pd.read_csv("sondagens.csv")
sondagens

Unnamed: 0,sondagens,Data de amostragem,Amostra,Abstenção,PS,PSD,BE,CDU,CDS,PAN,CH,IL,L,O,V
0,Pitagórica (diário),11-14 jan 2022,608,,38.8,29.3,6.8,4.6,1.3,1.9,7.8,4.9,1.5,3.1,9.5
1,Pitagórica (diário),10-13 jan 2022,608,,38.9,29.7,6.2,4.6,1.7,2.1,6.9,5.4,1.2,3.3,9.2
2,Aximage,6-12 jan 2022,807,,38.1,28.5,7.4,4.8,1.8,2.1,9.0,3.7,,4.6,9.6
3,CESOP–UCP,6–10 jan 2022,1246,,39.0,30.0,6.0,5.0,2.0,3.0,6.0,4.0,2.0,3.0,9.0
4,Intercampus,4–10 jan 2022,615,,35.3,29.3,8.5,6.0,1.1,4.3,7.1,5.6,0.6,2.2,6.0
5,Pitagórica,30 dez 2021–9 jan 2022,600,,39.6,30.0,6.4,5.1,1.5,1.8,5.7,5.1,,4.8,9.6
6,CESOP–UCP,28 dez 2021–5 jan 2022,1238,,38.0,32.0,6.0,6.0,2.0,2.0,5.0,5.0,1.0,3.0,6.0
7,ICS/ISCTE,10–20 dez 2021,901,,38.0,31.0,5.0,6.0,2.0,2.0,7.0,4.0,,5.0,7.0
8,Intercampus,7–14 dez 2021,603,,35.5,26.8,6.6,4.5,1.6,3.5,8.9,6.4,0.8,5.3,8.7
9,Aximage,9–13 dez 2021,810,,35.4,33.2,7.3,5.1,1.3,2.5,6.2,3.7,,5.3,2.2


In [6]:
# Funcoes para fazer parse as datas de inicio e de fim das sondagens
month_map = {"out": 10, "nov": 11, "dez": 12, "jan": 1}


def _split_start_end(datas_sond):
    for splitter in ("–", "-"):
        if splitter in datas_sond:
            return datas_sond.split(splitter)
        

def _get_dt(dt_str):
    dd, mm, yy = dt_str.split()
    return datetime.date(year=int(yy), month=month_map[mm.lower()], day=int(dd))


def get_start_sond(datas_sond):
    start_str, end_str = _split_start_end(datas_sond)
    start_split = start_str.split()
    if len(start_split) == 3:
        # data tem ano, mes e dia
        return _get_dt(start_str)
    else:
        end_dt = _get_dt(end_str)
        if len(start_split) == 2:
            # data tem mes e dia, ano tirado do end_date
            dd, mm = start_split
            return datetime.date(year=end_dt.year, month=month_map[mm.lower()], day=int(dd))
        else:
            # data tem apenas dia, ano e mes sao os da end_date
            return datetime.date(year=end_dt.year, month=end_dt.month, day=int(start_split[0]))


def get_end_sond(datas_sond):
    end_str = _split_start_end(datas_sond)[1]
    return _get_dt(end_str)

In [7]:
sondagens["Início"] = sondagens["Data de amostragem"].apply(get_start_sond)
sondagens["Fim"] = sondagens["Data de amostragem"].apply(get_end_sond)
sondagens

Unnamed: 0,sondagens,Data de amostragem,Amostra,Abstenção,PS,PSD,BE,CDU,CDS,PAN,CH,IL,L,O,V,Início,Fim
0,Pitagórica (diário),11-14 jan 2022,608,,38.8,29.3,6.8,4.6,1.3,1.9,7.8,4.9,1.5,3.1,9.5,2022-01-11,2022-01-14
1,Pitagórica (diário),10-13 jan 2022,608,,38.9,29.7,6.2,4.6,1.7,2.1,6.9,5.4,1.2,3.3,9.2,2022-01-10,2022-01-13
2,Aximage,6-12 jan 2022,807,,38.1,28.5,7.4,4.8,1.8,2.1,9.0,3.7,,4.6,9.6,2022-01-06,2022-01-12
3,CESOP–UCP,6–10 jan 2022,1246,,39.0,30.0,6.0,5.0,2.0,3.0,6.0,4.0,2.0,3.0,9.0,2022-01-06,2022-01-10
4,Intercampus,4–10 jan 2022,615,,35.3,29.3,8.5,6.0,1.1,4.3,7.1,5.6,0.6,2.2,6.0,2022-01-04,2022-01-10
5,Pitagórica,30 dez 2021–9 jan 2022,600,,39.6,30.0,6.4,5.1,1.5,1.8,5.7,5.1,,4.8,9.6,2021-12-30,2022-01-09
6,CESOP–UCP,28 dez 2021–5 jan 2022,1238,,38.0,32.0,6.0,6.0,2.0,2.0,5.0,5.0,1.0,3.0,6.0,2021-12-28,2022-01-05
7,ICS/ISCTE,10–20 dez 2021,901,,38.0,31.0,5.0,6.0,2.0,2.0,7.0,4.0,,5.0,7.0,2021-12-10,2021-12-20
8,Intercampus,7–14 dez 2021,603,,35.5,26.8,6.6,4.5,1.6,3.5,8.9,6.4,0.8,5.3,8.7,2021-12-07,2021-12-14
9,Aximage,9–13 dez 2021,810,,35.4,33.2,7.3,5.1,1.3,2.5,6.2,3.7,,5.3,2.2,2021-12-09,2021-12-13


In [8]:
# Remover primeira sondagem diaria da Pitagorica (alguns dados coincidentes com segunda)
sondagens = sondagens.drop(1)
# Ordenas por data de fim da sondagem (data de inicio em caso de empate)
sondagens = sondagens.sort_values(by=["Fim", "Início"]).set_index("Fim")
# Preencher valores vazios do Livre
sondagens["L"] = sondagens["L"].ffill()
# Mostrar ultimas N sondages
sondagens.tail(N_SONDAGENS)

Unnamed: 0_level_0,sondagens,Data de amostragem,Amostra,Abstenção,PS,PSD,BE,CDU,CDS,PAN,CH,IL,L,O,V,Início
Fim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2022-01-10,Intercampus,4–10 jan 2022,615,,35.3,29.3,8.5,6.0,1.1,4.3,7.1,5.6,0.6,2.2,6.0,2022-01-04
2022-01-10,CESOP–UCP,6–10 jan 2022,1246,,39.0,30.0,6.0,5.0,2.0,3.0,6.0,4.0,2.0,3.0,9.0,2022-01-06
2022-01-12,Aximage,6-12 jan 2022,807,,38.1,28.5,7.4,4.8,1.8,2.1,9.0,3.7,2.0,4.6,9.6,2022-01-06
2022-01-14,Pitagórica (diário),11-14 jan 2022,608,,38.8,29.3,6.8,4.6,1.3,1.9,7.8,4.9,1.5,3.1,9.5,2022-01-11


In [9]:
# Multiplicar percentagens por amostras para dar mais importancia a sondagens com maior amostragem
sond_abs = sondagens[PARTIDOS] * np.tile(sondagens["Amostra"], (len(PARTIDOS), 1)).T
sond_abs.tail()

Unnamed: 0_level_0,PS,PSD,BE,CDU,CDS,PAN,CH,IL,L
Fim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-01-09,23760.0,18000.0,3840.0,3060.0,900.0,1080.0,3420.0,3060.0,600.0
2022-01-10,21709.5,18019.5,5227.5,3690.0,676.5,2644.5,4366.5,3444.0,369.0
2022-01-10,48594.0,37380.0,7476.0,6230.0,2492.0,3738.0,7476.0,4984.0,2492.0
2022-01-12,30746.7,22999.5,5971.8,3873.6,1452.6,1694.7,7263.0,2985.9,1614.0
2022-01-14,23590.4,17814.4,4134.4,2796.8,790.4,1155.2,4742.4,2979.2,912.0


In [10]:
sond_sum = sond_abs.rolling(window=N_SONDAGENS).sum().dropna(how="all")
sond_sum

Unnamed: 0_level_0,PS,PSD,BE,CDU,CDS,PAN,CH,IL,L
Fim,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-11-04,136324.3,95003.2,22455.2,18640.6,6732.0,9360.8,28030.7,15418.1,
2021-11-11,121025.5,83930.4,21924.8,16127.4,5386.0,9175.2,22428.7,12334.5,
2021-11-15,113985.0,84399.7,18233.4,15683.6,4280.0,8114.3,20683.1,11185.4,
2021-12-12,105110.0,83412.2,17295.9,14571.1,4105.0,8389.3,16620.6,13272.9,
2021-12-13,99542.0,83964.2,17062.9,14312.1,3402.0,7780.3,17252.6,11879.9,534.4
2021-12-14,97080.5,82927.4,16330.3,13782.0,3142.8,7198.0,18763.7,13168.7,894.4
2021-12-20,107443.5,90795.9,17460.3,15938.0,4444.8,7812.5,20633.2,14147.7,1490.2
2022-01-05,131362.5,110599.4,21825.8,19678.5,6295.8,8413.5,22885.7,16650.2,2603.2
2022-01-09,126448.5,101707.4,19752.8,18607.5,6142.8,7468.5,21283.7,16713.2,3041.2
2022-01-10,126751.5,103566.5,21000.5,19584.0,5854.5,8002.5,20283.5,16298.0,2927.8


In [11]:
votos_base = sond_sum.iloc[-1].to_dict()
votos_base

{'PS': 124640.6,
 'PSD': 96213.4,
 'BE': 22809.699999999997,
 'CDU': 16590.399999999998,
 'CDS': 5411.5,
 'PAN': 9232.400000000001,
 'CH': 23847.9,
 'IL': 14393.100000000002,
 'L': 5386.999999999999}

In [12]:
def dhont(n_seats, votes):
    votes_cp = votes.copy()
    seats = {key: 0 for key in votes}
    s = 0
    while s < n_seats:
        max_v = max(votes_cp.values())
        next_seat = list(votes_cp.keys())[list(votes_cp.values()).index(max_v)]
        seats[next_seat] += 1
        votes_cp[next_seat] = votes[next_seat] / (seats[next_seat] + 1)
        s += 1

    return seats

In [13]:
total = {p: {"exp": 0, "min": 0, "max": 0} for p in PARTIDOS}
votes_margin = sum(votos_base.values()) * ERRO_PCT / 100
for dist, n in n_deps.items():
    for p in votos_base.keys():
        # Resultado esperado
        result_exp = dhont(n, votos_base)
        total[p]["exp"] += result_exp[p]
        # Resultado pessimista
        votes_min = votos_base.copy()
        votes_min[p] = max(0, votes_min[p] - votes_margin)
        result_min = dhont(n, votes_min)
        total[p]["min"] += result_min[p]
        # Resultado optimista
        votes_max = votos_base.copy()
        votes_max[p] += votes_margin
        result_max = dhont(n, votes_max)
        total[p]["max"] += result_max[p]
        
assert sum([i["exp"] for i in total.values()]) == N_TOTAL_DEPUTADOS
total

{'PS': {'exp': 111, 'min': 109, 'max': 112},
 'PSD': {'exp': 85, 'min': 77, 'max': 86},
 'BE': {'exp': 9, 'min': 8, 'max': 15},
 'CDU': {'exp': 7, 'min': 3, 'max': 8},
 'CDS': {'exp': 0, 'min': 0, 'max': 2},
 'PAN': {'exp': 2, 'min': 1, 'max': 3},
 'CH': {'exp': 11, 'min': 8, 'max': 15},
 'IL': {'exp': 5, 'min': 2, 'max': 7},
 'L': {'exp': 0, 'min': 0, 'max': 2}}

In [14]:
min_deps = N_TOTAL_DEPUTADOS // 2 + 1

def get_solutions(n_deputados):
    solutions = set()
    total_list = [(k, v) for k, v in n_deputados.items()]
    for i in range(1, len(n_deputados)+1):
        for subset in itertools.combinations(total_list, i):
            sorted_subset = sorted(subset, key=lambda x: x[1], reverse=True)
            s = 0
            for j, elem in enumerate(sorted_subset):
                s += elem[1]
                if s >= min_deps:
                    sol = tuple(sorted_subset[:j+1])
                    solutions.add(tuple((k, v) for k, v in sol))
                    break
                    
    return solutions

In [15]:
get_solutions({p: total[p]["exp"] for p in total})

{(('PS', 111), ('BE', 9)),
 (('PS', 111), ('CDU', 7)),
 (('PS', 111), ('CH', 11)),
 (('PS', 111), ('IL', 5)),
 (('PS', 111), ('PSD', 85)),
 (('PSD', 85), ('CH', 11), ('BE', 9), ('CDU', 7), ('IL', 5))}

In [16]:
get_solutions({p: total[p]["max"] for p in total})

{(('PS', 112), ('BE', 15)),
 (('PS', 112), ('CDS', 2), ('L', 2)),
 (('PS', 112), ('CDU', 8)),
 (('PS', 112), ('CH', 15)),
 (('PS', 112), ('IL', 7)),
 (('PS', 112), ('PAN', 3), ('CDS', 2)),
 (('PS', 112), ('PAN', 3), ('L', 2)),
 (('PS', 112), ('PSD', 86)),
 (('PSD', 86), ('BE', 15), ('CDU', 8), ('IL', 7)),
 (('PSD', 86), ('BE', 15), ('CDU', 8), ('PAN', 3), ('CDS', 2), ('L', 2)),
 (('PSD', 86), ('BE', 15), ('CH', 15)),
 (('PSD', 86), ('CH', 15), ('CDU', 8), ('IL', 7)),
 (('PSD', 86), ('CH', 15), ('CDU', 8), ('PAN', 3), ('CDS', 2), ('L', 2))}

In [17]:
esquerda = ['PS', 'BE', 'CDU', 'PAN', 'L']
get_solutions({p: total[p]["max"] for p in esquerda})

{(('PS', 112), ('BE', 15)),
 (('PS', 112), ('CDU', 8)),
 (('PS', 112), ('PAN', 3), ('L', 2))}