In [1]:
import requests
from bs4 import BeautifulSoup
import re
import json
from datetime import datetime

url = "https://www.peilingennederland.nl/alle-peilingen.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Find poll metadata (pollster name and date)
poll_name = "Unknown"
poll_date = "Unknown"

# Look for poll title/header patterns
# Try to find h2 or h3 elements that might contain poll info
headers = soup.find_all(['h1', 'h2', 'h3', 'h4'])
for header in headers:
    text = header.get_text().strip()
    # Look for patterns like "Peiling Maurice de Hond (04-10-2025)" or similar
    date_match = re.search(r'\((\d{1,2})-(\d{1,2})-(\d{4})\)', text)
    if date_match:
        day, month, year = date_match.groups()
        poll_date = f"{day.zfill(2)}-{month.zfill(2)}-{year}"
        
        # Extract pollster name - usually before the date
        pollster_match = re.search(r'Peiling\s+(.+?)\s*\(', text, re.IGNORECASE)
        if pollster_match:
            poll_name = pollster_match.group(1).strip()
            # Clean pollster name for filename
            poll_name = re.sub(r'[^\w\s-]', '', poll_name)
            poll_name = re.sub(r'\s+', '', poll_name)
        break

# Alternative: look in meta tags or page title
if poll_name == "Unknown" or poll_date == "Unknown":
    title = soup.find('title')
    if title:
        title_text = title.get_text()
        date_match = re.search(r'(\d{1,2})-(\d{1,2})-(\d{4})', title_text)
        if date_match:
            day, month, year = date_match.groups()
            poll_date = f"{day.zfill(2)}-{month.zfill(2)}-{year}"

# Find the first poll block (the first <span> with party data)
spans = soup.find_all("span", style=re.compile("color:rgb\(42, 42, 42\)"))
party_pattern = re.compile(r"([A-Za-z0-9\-\/]+):\s*(\d+)")

seat_distribution = {}

# Only use the first block with multiple parties (most recent peiling)
for span in spans:
    text = span.decode_contents()
    matches = list(party_pattern.finditer(text))
    if len(matches) >= 10:  # Heuristic: at least 10 parties in a poll
        for match in matches:
            party = match.group(1).replace('\u200b', '').strip()
            seats = int(match.group(2))
            seat_distribution[party] = seats
        break  # Stop after the first relevant block

# Normalize party names if needed
if 'GL-PvdA' in seat_distribution:
    seat_distribution['GL/PvdA'] = seat_distribution.pop('GL-PvdA')

# Sort by seat count descending
seat_distribution = dict(sorted(seat_distribution.items(), key=lambda x: -x[1]))

print(f"Poll Info: {poll_name} ({poll_date})")
print("Most Recent Seat Distribution:", seat_distribution)

# Create filename with poll info
if poll_date != "Unknown" and poll_name != "Unknown":
    filename_base = f"{poll_date}-{poll_name}"
else:
    # Fallback to current date
    filename_base = datetime.now().strftime("%d-%m-%Y") + "-AutoExtracted"

# Save with descriptive filename
verdeling_filename = f"coalitions/verdeling-{filename_base}.json"
with open(verdeling_filename, "w", encoding="utf-8") as f:
    json.dump(seat_distribution, f, ensure_ascii=False, indent=2)

print(f"Saved seat distribution to: {verdeling_filename}")

# Store for use in subsequent cells
poll_info = {
    "name": poll_name,
    "date": poll_date,
    "filename_base": filename_base
}

  spans = soup.find_all("span", style=re.compile("color:rgb\(42, 42, 42\)"))


Poll Info: MauricedeHond (10-10-2025)
Most Recent Seat Distribution: {'PVV': 31, 'GL/PvdA': 25, 'CDA': 22, 'VVD': 15, 'D66': 14, 'JA21': 13, 'FvD': 5, 'SP': 4, 'DENK': 4, 'BBB': 4, 'PvdD': 3, 'CU': 3, 'SGP': 3, 'Volt': 3, '50PLUS': 1, 'NSC': 0}
Saved seat distribution to: coalitions/verdeling-10-10-2025-MauricedeHond.json


In [2]:
# HANDMATIGE PEILING INVOER
# Gebruik deze cel om handmatig een peiling in te voeren in plaats van automatische extractie
import re
import json
from datetime import datetime
# Zet deze op True om handmatige data te gebruiken in plaats van automatische extractie
use_manual_poll = False

# Handmatige peiling configuratie
manual_poll_name = "Ipsos I&O"
manual_poll_date = "08-10-2025"  # Formaat: dd-mm-yyyy

# Handmatige zetelverdeling - pas deze aan naar wens
manual_seat_distribution = {
    'PVV': 31,
    'GL/PvdA': 22,
    'VVD': 13,
    'NSC': 0,
    'D66': 14,
    'BBB': 4,
    'CDA': 24,
    'SP': 4,
    'PvdD': 5,
    'CU': 2,
    'SGP': 4,
    'DENK': 4,
    'FvD': 4,
    'JA21': 13,
    'Volt': 4,
    '50PLUS': 2
}

# Activeer handmatige data als use_manual_poll = True
if use_manual_poll:
    # Overschrijf de automatisch geëxtraheerde data
    poll_name = manual_poll_name
    poll_date = manual_poll_date
    seat_distribution = dict(sorted(manual_seat_distribution.items(), key=lambda x: -x[1]))
    
    # Update filename base
    filename_base = f"{poll_date}-{poll_name.replace(' ', '').replace('&', '')}"
    
    # Save manual poll data
    verdeling_filename = f"coalitions/verdeling-{filename_base}.json"
    with open(verdeling_filename, "w", encoding="utf-8") as f:
        json.dump(seat_distribution, f, ensure_ascii=False, indent=2)
    
    # Update poll_info
    poll_info = {
        "name": poll_name,
        "date": poll_date,
        "filename_base": filename_base
    }
    
    print("🔧 HANDMATIGE PEILING ACTIEF")
    print(f"Poll Info: {poll_name} ({poll_date})")
    print("Handmatige zetelverdeling:", seat_distribution)
    print(f"Opgeslagen als: {verdeling_filename}")
    print()

print(f"Gebruikte peiling: {poll_info['name']} ({poll_info['date']})")
print(f"Filename base: {poll_info['filename_base']}")
print(f"Totaal zetels: {sum(seat_distribution.values())}")

Gebruikte peiling: MauricedeHond (10-10-2025)
Filename base: 10-10-2025-MauricedeHond
Totaal zetels: 150


In [3]:
%run coalition-calculations-no-biggest-party.py

if __name__ == "__main__":
    kabinetten, zetels, ek_zetels, topic_vectors = load_data()
    coalition_counter = build_coalition_frequency(kabinetten)

#     # User Input  # <<—— Add the (alleged) seat distribution for the election you want to predict
#     seat_distribution = {
# }

    Jaar = 2025  # <<—— Add the election year for Eerste Kamer seat distribution

    predictions = predict_coalitions(
        seat_distribution, 
        coalition_counter,
        ek_zetels=ek_zetels, 
        Jaar=Jaar, 
        threshold=76, 
        top_k=7,
        topic_vectors=topic_vectors
    )


    for p in predictions:
        print(f"Coalition: {p['coalition']}")
        print(f"  Seats: {p['seats']}")
        print(f"  History Score: {p['historical_score']}")
        print(f"  Ideology Score: {p['ideology_score']}")
        print(f"  EK Score: {p['ek_score']}")  # Optional: show EK alignment
        print(f"  EK Seats: {p['ek_total_seats']}")
        print(f"  JSD Penalty: {p['jsd_penalty']}")
        print(f"  Party Penalty: {p['party_penalty']}")
        print(f"  Surplus Penalty: {p['surplus_penalty']}")
        print(f"  Final Score: {p['final_score']}%")
        print("")


Coalition: ('GL/PvdA', 'CDA', 'VVD', 'D66')
  Seats: 76
  History Score: 1.17
  Ideology Score: 2.25
  EK Score: 0.47
  EK Seats: 35
  JSD Penalty: 0.05
  Party Penalty: 0
  Surplus Penalty: 0.0
  Final Score: 33.0%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'D66', 'CU')
  Seats: 79
  History Score: 1.17
  Ideology Score: 2.03
  EK Score: 1.0
  EK Seats: 38
  JSD Penalty: 0.05
  Party Penalty: 2
  Surplus Penalty: 0.0
  Final Score: 0.2%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'JA21', '50PLUS')
  Seats: 76
  History Score: 1.05
  Ideology Score: 2.65
  EK Score: 0.41
  EK Seats: 31
  JSD Penalty: 0.11
  Party Penalty: 2
  Surplus Penalty: 0.0
  Final Score: 0%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'SP', 'DENK', 'PvdD', 'CU')
  Seats: 76
  History Score: 1.05
  Ideology Score: 2.29
  EK Score: 1.0
  EK Seats: 39
  JSD Penalty: 0.12
  Party Penalty: 6
  Surplus Penalty: 0.0
  Final Score: 0%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'SP', 'DENK', 'PvdD', 'Volt')
  Seats: 76
  History Score: 1.05
  

In [4]:
import json

# Add seat_distribution to each coalition for the visualizer
for p in predictions:
    p['seat_distribution'] = {party: seat_distribution[party] for party in p['coalition']}
    p['coalition'] = list(p['coalition'])  # Convert tuple to list for JSON

# Only coalitions with >0% final score, else top 3
filtered = [p for p in predictions if p['final_score'] > 0]
if not filtered:
    filtered = sorted(predictions, key=lambda x: -x['final_score'])[:3]

# Use poll-specific filename
any_filename = f"coalitions/coalition_data_any-{poll_info['filename_base']}.json"
with open(any_filename, 'w', encoding='utf-8') as f:
    json.dump(filtered, f, ensure_ascii=False, indent=2)
    
print(f"Saved coalition data (any) to: {any_filename}")

Saved coalition data (any) to: coalitions/coalition_data_any-10-10-2025-MauricedeHond.json


In [5]:
%run coalition-calculations.py

if __name__ == "__main__":
    kabinetten, zetels, ek_zetels, topic_vectors = load_data()
    coalition_counter = build_coalition_frequency(kabinetten)

#     # User Input  # <<—— Add the (alleged) seat distribution for the election you want to predict
#     seat_distribution = {

# }

    Jaar = 2025  # <<—— Add the election year for Eerste Kamer seat distribution

    predictions = predict_coalitions(
        seat_distribution, 
        coalition_counter, 
        ek_zetels=ek_zetels, 
        Jaar=Jaar, 
        threshold=76, 
        top_k=7,
        topic_vectors=topic_vectors
    )


    for p in predictions:
        print(f"Coalition: {p['coalition']}")
        print(f"  Seats: {p['seats']}")
        print(f"  History Score: {p['historical_score']}")
        print(f"  Ideology Score: {p['ideology_score']}")
        print(f"  EK Score: {p['ek_score']}")  # Optional: show EK alignment
        print(f"  EK Seats: {p['ek_total_seats']}")
        print(f"  JSD Penalty: {p['jsd_penalty']}")
        print(f"  Party Penalty: {p['party_penalty']}")
        print(f"  Surplus Penalty: {p['surplus_penalty']}")
        print(f"  Final Score: {p['final_score']}%")
        print("")


In [6]:
import json

# Add seat_distribution to each coalition for the visualizer
for p in predictions:
    p['seat_distribution'] = {party: seat_distribution[party] for party in p['coalition']}
    p['coalition'] = list(p['coalition'])  # Convert tuple to list for JSON

# Only coalitions with >0% final score, else top 3
filtered = [p for p in predictions if p['final_score'] > 0]
if not filtered:
    filtered = sorted(predictions, key=lambda x: -x['final_score'])[:3]

# Use poll-specific filename
with_biggest_filename = f"coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json"
with open(with_biggest_filename, 'w', encoding='utf-8') as f:
    json.dump(filtered, f, ensure_ascii=False, indent=2)
    
print(f"Saved coalition data (with biggest) to: {with_biggest_filename}")

Saved coalition data (with biggest) to: coalitions/coalition_data_with_biggest-10-10-2025-MauricedeHond.json


In [7]:
# Summary of generated files
print("="*60)
print("GENERATED FILES SUMMARY")
print("="*60)
print(f"Poll Information:")
print(f"  Name: {poll_info['name']}")
print(f"  Date: {poll_info['date']}")
print(f"  Filename Base: {poll_info['filename_base']}")
print()
print("Generated Files:")
print(f"  1. coalitions/verdeling-{poll_info['filename_base']}.json")
print(f"  2. coalitions/coalition_data_any-{poll_info['filename_base']}.json")
print(f"  3. coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json")
print()
print("File naming pattern: dd-mm-yyyy-PollsterName")
print("Example: 04-10-2025-MauricedeHond")
print("="*60)

# AUTO-UPDATE FALLBACK FILES
print("\n🔄 Updating fallback files...")

# Update fallback verdeling.json with current poll data
with open("coalitions/verdeling.json", "w", encoding="utf-8") as f:
    json.dump(seat_distribution, f, ensure_ascii=False, indent=2)
print("✅ Updated coalitions/verdeling.json")

# Update fallback coalition_data_with_biggest.json
try:
    with open(f"coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json", "r", encoding="utf-8") as f:
        coalition_data = json.load(f)
    with open("coalitions/coalition_data_with_biggest.json", "w", encoding="utf-8") as f:
        json.dump(coalition_data, f, ensure_ascii=False, indent=2)
    print("✅ Updated coalitions/coalition_data_with_biggest.json")
except Exception as e:
    print(f"⚠️  Could not update coalition_data_with_biggest.json: {e}")

# Update fallback coalition_data_any.json
try:
    with open(f"coalitions/coalition_data_any-{poll_info['filename_base']}.json", "r", encoding="utf-8") as f:
        coalition_data = json.load(f)
    with open("coalitions/coalition_data_any.json", "w", encoding="utf-8") as f:
        json.dump(coalition_data, f, ensure_ascii=False, indent=2)
    print("✅ Updated coalitions/coalition_data_any.json")
except Exception as e:
    print(f"⚠️  Could not update coalition_data_any.json: {e}")

# CREATE/UPDATE STATIC POLL INDEX FOR GITHUB PAGES
print("\n📁 Updating static poll index for GitHub Pages...")

import os
import glob
from datetime import datetime

# Get all poll files in coalitions directory
poll_files = glob.glob("coalitions/verdeling-*.json")
poll_index = []

for file_path in poll_files:
    filename = os.path.basename(file_path)
    # Extract poll ID from filename (remove 'verdeling-' and '.json')
    poll_id = filename.replace('verdeling-', '').replace('.json', '')
    
    # Skip the fallback files
    if poll_id in ['verdeling', 'coalition_data_any', 'coalition_data_with_biggest']:
        continue
    
    # Parse the poll ID to extract date and pollster
    parts = poll_id.split('-')
    if len(parts) >= 4:
        day, month, year = parts[0], parts[1], parts[2]
        pollster = '-'.join(parts[3:])
        
        # Create date for sorting
        try:
            date_obj = datetime(int(year), int(month), int(day))
            
            # Create readable name
            if pollster.lower().replace('-', '').replace(' ', '') == 'tweedekamerverkiezing':
                poll_name = f"Tweede Kamer verkiezing {year}"
                is_election = True
            elif 'mauricedehond' in pollster.lower().replace('-', '').replace(' ', ''):
                poll_name = f"Maurice de Hond ({day}-{month}-{year})"
                is_election = False
            elif 'ipsos' in pollster.lower():
                poll_name = f"Ipsos I&O ({day}-{month}-{year})"
                is_election = False
            elif 'verian' in pollster.lower():
                poll_name = f"Verian ({day}-{month}-{year})"
                is_election = False
            else:
                # Generic formatting
                clean_pollster = pollster.replace('-', ' ').title()
                poll_name = f"{clean_pollster} ({day}-{month}-{year})"
                is_election = False
            
            poll_index.append({
                "id": poll_id,
                "name": poll_name,
                "date": f"{day}-{month}-{year}",
                "timestamp": date_obj.timestamp(),
                "isElection": is_election,
                "files": {
                    "seats": f"model/coalitions/verdeling-{poll_id}.json",
                    "coalitionWithBiggest": f"model/coalitions/coalition_data_with_biggest-{poll_id}.json",
                    "coalitionAny": f"model/coalitions/coalition_data_any-{poll_id}.json"
                }
            })
        except ValueError:
            # Skip files with invalid date format
            continue

# Sort by date (newest first), elections last
poll_index.sort(key=lambda x: (x["isElection"], -x["timestamp"]))

# Save the poll index
with open("coalitions/poll-index.json", "w", encoding="utf-8") as f:
    json.dump(poll_index, f, ensure_ascii=False, indent=2)

print(f"✅ Updated coalitions/poll-index.json with {len(poll_index)} polls")
print("🎉 Fallback files and static index automatically updated!")
print("   Your GitHub Pages website will now work correctly!")

GENERATED FILES SUMMARY
Poll Information:
  Name: MauricedeHond
  Date: 10-10-2025
  Filename Base: 10-10-2025-MauricedeHond

Generated Files:
  1. coalitions/verdeling-10-10-2025-MauricedeHond.json
  2. coalitions/coalition_data_any-10-10-2025-MauricedeHond.json
  3. coalitions/coalition_data_with_biggest-10-10-2025-MauricedeHond.json

File naming pattern: dd-mm-yyyy-PollsterName
Example: 04-10-2025-MauricedeHond

🔄 Updating fallback files...
✅ Updated coalitions/verdeling.json
✅ Updated coalitions/coalition_data_with_biggest.json
✅ Updated coalitions/coalition_data_any.json

📁 Updating static poll index for GitHub Pages...
✅ Updated coalitions/poll-index.json with 6 polls
🎉 Fallback files and static index automatically updated!
   Your GitHub Pages website will now work correctly!
