In [61]:
import pandas as pd
from itertools import combinations
from collections import Counter
import math

# -------------------------------
# Ideological spectrum (manually defined, -5=far-left to +5=far-right)
# -------------------------------
IDEOLOGY_MAP = {
    "SP": -5,
    "PvdA": -3,
    "GL": -3,
    "PvdD": -2,
    "D66": -1,
    "Volt": -1,
    "CDA": 1,
    "CU": 1,
    "SGP": 2,
    "VVD": 3,
    "JA21": 4,
    "PVV": 5,
    "BBB": 2,
    "Forum voor Democratie": 5,
    "DENK": -4,
    "50PLUS": 0,
    "BIJ1": -5,
    "LP": 3,
    "NSC": 1
}

In [62]:
# -------------------------------
# Load and normalize data
# -------------------------------
def load_data():
    kabinetten = pd.read_csv('data/cabinets/kabinetten_schoongemaakt.csv')
    zetels_100 = pd.read_csv('data/zetelverdeling/zetel-data/tk_zetels100_1918-1956.csv')
    zetels_150 = pd.read_csv('data/zetelverdeling/zetel-data/tk_zetels150_1956-2023.csv')
    zetels = pd.concat([zetels_100, zetels_150], ignore_index=True)

    kabinetten['Partijen'] = kabinetten['Partijen'].dropna().str.split(', ')
    return kabinetten, zetels

In [63]:
# -------------------------------
# Build historical coalition frequency model
# -------------------------------
def build_coalition_frequency(kabinetten):
    coalition_counter = Counter()
    for partijen in kabinetten['Partijen'].dropna():
        for r in range(2, len(partijen) + 1):
            for combo in combinations(sorted(partijen), r):
                coalition_counter[combo] += 1
    return coalition_counter

In [64]:
# -------------------------------
# Ideological compatibility score (lower is better)
# -------------------------------
def ideological_distance(parties):
    ideologies = [IDEOLOGY_MAP.get(p, 0) for p in parties]
    if len(ideologies) <= 1:
        return 0
    variance = sum((i - sum(ideologies)/len(ideologies))**2 for i in ideologies) / len(ideologies)
    return math.sqrt(variance)

In [65]:
# # -------------------------------
# # Predict coalitions (Without historical data)
# # -------------------------------
# def predict_coalitions(seat_distribution, coalition_counter, threshold=76, top_k=5):
#     parties = list(seat_distribution.keys())
#     valid_coalitions = []

#     for r in range(2, len(parties)+1):
#         for combo in combinations(parties, r):
#             seats = sum(seat_distribution[p] for p in combo)
#             if seats >= threshold:
#                 historical_score = coalition_counter.get(tuple(sorted(combo)), 0)
#                 ideology_score = ideological_distance(combo)
#                 # Combine both into a final score: higher = better
#                 final_score = (historical_score * 2) - ideology_score
#                 valid_coalitions.append({
#                     "coalition": combo,
#                     "seats": seats,
#                     "historical_score": historical_score,
#                     "ideology_score": round(ideology_score, 2),
#                     "final_score": round(final_score, 2)
#                 })

#     valid_coalitions.sort(key=lambda x: (-x["final_score"], -x["seats"]))
#     return valid_coalitions[:top_k]

In [66]:
# -------------------------------
# Reduce impact of smaller parties
# -------------------------------
def scaled_seat_weight(seat_count):
    """Scale the impact of a party's seat count using a logarithmic function."""
    # We use log scale to reduce the influence of smaller parties.
    return math.log(seat_count + 1)  # +1 to avoid log(0)

In [None]:
# -------------------------------
# Define unrealistic combinations (only add the ones that are definitely unrealistic)
# -------------------------------
def is_unrealistic_combo(parties):
    # Add more logic if needed
    extremes = [
        ('FvD', 'Volt'),
        ('PVV', 'BIJ1'),
        ('SGP', 'BIJ1'),
        ('FvD', 'D66'),
        ('PVV', 'GL/PvdA'),
        ('PVV', 'DENK'),
        ('PVV', 'Volt'),
        ('SGP', 'Volt'),
        
    ]
    party_set = set(parties)
    for a, b in extremes:
        if a in party_set and b in party_set:
            return True
    return False

In [68]:
# -------------------------------
# Define new parties
# -------------------------------
PARTY_LINEAGE = {
    "GL/PvdA": ["GL", "PvdA"],  # Merged parties
    "NSC": ["CDA"],             # NSC is a breakaway from CDA
    "JA21": ["FvD"],            # JA21 split from FvD
    "Volt": [],                 # Volt is a new party (no historical link)
    "BBB": [],                  # BBB is a new party (no historical link)
    # Add other mappings if necessary
}

def expand_party(party):
    """Return historical equivalents for a party (e.g., GL/PvdA -> [GL, PvdA])"""
    return PARTY_LINEAGE.get(party, [party])

def get_expanded_coalition(combo):
    """Expand a coalition to include historical equivalents"""
    expanded = set()
    for party in combo:
        expanded.update(expand_party(party))
    return expanded

def calculate_historical_score(combo, coalition_counter, seat_distribution):
    """Compute adjusted historical overlap score using lineage info and seat scaling"""
    expanded_combo = get_expanded_coalition(combo)

    score = 0
    total_weight = 0  # To keep track of the total weight for normalization
    
    for historical_coalition in coalition_counter:
        overlap = expanded_combo & set(historical_coalition)
        
        if len(overlap) >= 2:  # If there's enough overlap
            # Calculate overlap score: how much overlap, divided by the total length of the coalition
            overlap_score = len(overlap) / len(historical_coalition)
            
            # Check if it's a lineage-based match (partial weight) or direct match (full weight)
            if any(party in PARTY_LINEAGE for party in combo):
                # If it's from a different lineage, give partial weight
                overlap_score *= 0.5  # Apply 50% weight for lineage-based matches
            
            # Sum the weighted overlap score, scaled by seat count
            for party in combo:
                party_weight = scaled_seat_weight(seat_distribution[party])
                total_weight += party_weight
                score += coalition_counter[historical_coalition] * overlap_score * party_weight
    
    # Normalize the score by the total weight of the parties in the combo
    if total_weight > 0:
        score /= total_weight
    return score

In [69]:
# -------------------------------
# Define main prediction function
# -------------------------------
def predict_coalitions(seat_distribution, coalition_counter, threshold=76, top_k=5):
    parties = list(seat_distribution.keys())
    valid_coalitions = []

    for r in range(2, len(parties) + 1):
        for combo in combinations(parties, r):
            seats = sum(seat_distribution[p] for p in combo)
            if seats >= threshold:

                if is_unrealistic_combo(combo):
                    continue

                # Calculate historical score with lineage adjustments and seat scaling
                historical_score = calculate_historical_score(combo, coalition_counter, seat_distribution)
                
                # Calculate ideology score
                ideology_score = ideological_distance(combo)

                # Apply penalties for party count and seat surplus
                party_penalty = max(0, len(combo) - 4) * 2
                surplus_penalty = max(0, seats - 85) * 0.5

                # Final score computation
                final_score = (historical_score * 2) - (ideology_score * 2) - party_penalty - surplus_penalty

                valid_coalitions.append({
                    "coalition": combo,
                    "seats": seats,
                    "historical_score": round(historical_score, 2),
                    "ideology_score": round(ideology_score, 2),
                    "party_penalty": round(party_penalty, 2),
                    "surplus_penalty": round(surplus_penalty, 2),
                    "final_score": round(final_score, 2)
                })

    valid_coalitions.sort(key=lambda x: (-x["final_score"], x["seats"]))  # Favor lower seat counts
    return valid_coalitions[:top_k]

In [72]:
# -------------------------------
# Example usage
# -------------------------------
if __name__ == "__main__":
    kabinetten, zetels = load_data()
    coalition_counter = build_coalition_frequency(kabinetten)

    # User Input
    seat_distribution = {
        'VVD': 24,
        'GL/PvdA': 25,
        'D66': 9,
        'CDA': 5,
        'CU': 3,
        'PVV': 37,
        'BBB': 7,
        'SP': 5,
        'PvdD': 3,
        'NSC': 20,
        'DENK': 3,
        'FvD': 3,
        'SGP': 3,
        'Volt': 2,
        'JA21': 1
}

    predictions = predict_coalitions(seat_distribution, coalition_counter, threshold=76, top_k=7)
    
    for p in predictions:
        print(f"Coalition: {p['coalition']}")
        print(f"  Seats: {p['seats']}")
        print(f"  History Score: {p['historical_score']}")
        print(f"  Ideology Score: {p['ideology_score']}")
        print(f"  Final Score: {p['final_score']}")
        print("")

Coalition: ('VVD', 'GL/PvdA', 'BBB', 'NSC')
  Seats: 76
  History Score: 1.05
  Ideology Score: 1.12
  Final Score: -0.13

Coalition: ('VVD', 'GL/PvdA', 'D66', 'NSC')
  Seats: 78
  History Score: 1.17
  Ideology Score: 1.48
  Final Score: -0.62

Coalition: ('VVD', 'PVV', 'NSC', 'JA21')
  Seats: 82
  History Score: 0.88
  Ideology Score: 1.48
  Final Score: -1.2

Coalition: ('VVD', 'PVV', 'NSC', 'SGP')
  Seats: 84
  History Score: 0.88
  Ideology Score: 1.48
  Final Score: -1.2

Coalition: ('VVD', 'PVV', 'NSC')
  Seats: 81
  History Score: 0.88
  Ideology Score: 1.63
  Final Score: -1.51

Coalition: ('VVD', 'CU', 'PVV', 'NSC')
  Seats: 84
  History Score: 0.88
  Ideology Score: 1.66
  Final Score: -1.56

Coalition: ('VVD', 'D66', 'CDA', 'PVV', 'SGP')
  Seats: 78
  History Score: 2.1
  Ideology Score: 2.0
  Final Score: -1.81



In [71]:
parties_2024= {
        'VVD': 24,
        'PvdA': 0,
        'GL': 25,
        'D66': 9,
        'CDA': 5,
        'CU': 3,
        'PVV': 37,
        'BBB': 7,
        'SP': 5,
        'PvdD': 3,
        'NSC': 20,
        'DENK': 3,
        'FvD': 3,
        'SGP': 3,
        'Volt': 2,
        'JA21': 1
}