In [56]:
import requests
from bs4 import BeautifulSoup
import re
import json
from datetime import datetime

url = "https://www.peilingennederland.nl/alle-peilingen.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Find poll metadata (pollster name and date)
poll_name = "Unknown"
poll_date = "Unknown"

# Look for poll title/header patterns
# Try to find h2 or h3 elements that might contain poll info
headers = soup.find_all(['h1', 'h2', 'h3', 'h4'])
for header in headers:
    text = header.get_text().strip()
    # Look for patterns like "Peiling Maurice de Hond (04-10-2025)" or similar
    date_match = re.search(r'\((\d{1,2})-(\d{1,2})-(\d{4})\)', text)
    if date_match:
        day, month, year = date_match.groups()
        poll_date = f"{day.zfill(2)}-{month.zfill(2)}-{year}"
        
        # Extract pollster name - usually before the date
        pollster_match = re.search(r'Peiling\s+(.+?)\s*\(', text, re.IGNORECASE)
        if pollster_match:
            poll_name = pollster_match.group(1).strip()
            # Clean pollster name for filename
            poll_name = re.sub(r'[^\w\s-]', '', poll_name)
            poll_name = re.sub(r'\s+', '', poll_name)
        break

# Alternative: look in meta tags or page title
if poll_name == "Unknown" or poll_date == "Unknown":
    title = soup.find('title')
    if title:
        title_text = title.get_text()
        date_match = re.search(r'(\d{1,2})-(\d{1,2})-(\d{4})', title_text)
        if date_match:
            day, month, year = date_match.groups()
            poll_date = f"{day.zfill(2)}-{month.zfill(2)}-{year}"

# Find the first poll block (the first <span> with party data)
spans = soup.find_all("span", style=re.compile("color:rgb\(42, 42, 42\)"))
party_pattern = re.compile(r"([A-Za-z0-9\-\/]+):\s*(\d+)")

seat_distribution = {}

# Only use the first block with multiple parties (most recent peiling)
for span in spans:
    text = span.decode_contents()
    matches = list(party_pattern.finditer(text))
    if len(matches) >= 10:  # Heuristic: at least 10 parties in a poll
        for match in matches:
            party = match.group(1).replace('\u200b', '').strip()
            seats = int(match.group(2))
            seat_distribution[party] = seats
        break  # Stop after the first relevant block

# Normalize party names if needed
if 'GL-PvdA' in seat_distribution:
    seat_distribution['GL/PvdA'] = seat_distribution.pop('GL-PvdA')

# Sort by seat count descending
seat_distribution = dict(sorted(seat_distribution.items(), key=lambda x: -x[1]))

print(f"Poll Info: {poll_name} ({poll_date})")
print("Most Recent Seat Distribution:", seat_distribution)

# Create filename with poll info
if poll_date != "Unknown" and poll_name != "Unknown":
    filename_base = f"{poll_date}-{poll_name}"
else:
    # Fallback to current date
    filename_base = datetime.now().strftime("%d-%m-%Y") + "-AutoExtracted"

# Save with descriptive filename
verdeling_filename = f"coalitions/verdeling-{filename_base}.json"
with open(verdeling_filename, "w", encoding="utf-8") as f:
    json.dump(seat_distribution, f, ensure_ascii=False, indent=2)

print(f"Saved seat distribution to: {verdeling_filename}")

# Store for use in subsequent cells
poll_info = {
    "name": poll_name,
    "date": poll_date,
    "filename_base": filename_base
}

  spans = soup.find_all("span", style=re.compile("color:rgb\(42, 42, 42\)"))


Poll Info: MauricedeHond (04-10-2025)
Most Recent Seat Distribution: {'PVV': 30, 'GL/PvdA': 27, 'CDA': 23, 'VVD': 13, 'JA21': 13, 'D66': 12, 'SP': 6, 'FvD': 5, 'DENK': 4, 'BBB': 4, 'PvdD': 3, 'CU': 3, 'SGP': 3, 'Volt': 3, '50PLUS': 1, 'NSC': 0}
Saved seat distribution to: coalitions/verdeling-04-10-2025-MauricedeHond.json


In [57]:
%run coalition-calculations-no-biggest-party.py

if __name__ == "__main__":
    kabinetten, zetels, ek_zetels, topic_vectors = load_data()
    coalition_counter = build_coalition_frequency(kabinetten)

#     # User Input  # <<—— Add the (alleged) seat distribution for the election you want to predict
#     seat_distribution = {
# }

    Jaar = 2025  # <<—— Add the election year for Eerste Kamer seat distribution

    predictions = predict_coalitions(
        seat_distribution, 
        coalition_counter,
        ek_zetels=ek_zetels, 
        Jaar=Jaar, 
        threshold=76, 
        top_k=7,
        topic_vectors=topic_vectors
    )


    for p in predictions:
        print(f"Coalition: {p['coalition']}")
        print(f"  Seats: {p['seats']}")
        print(f"  History Score: {p['historical_score']}")
        print(f"  Ideology Score: {p['ideology_score']}")
        print(f"  EK Score: {p['ek_score']}")  # Optional: show EK alignment
        print(f"  EK Seats: {p['ek_total_seats']}")
        print(f"  JSD Penalty: {p['jsd_penalty']}")
        print(f"  Party Penalty: {p['party_penalty']}")
        print(f"  Surplus Penalty: {p['surplus_penalty']}")
        print(f"  Final Score: {p['final_score']}%")
        print("")


Coalition: ('GL/PvdA', 'CDA', 'VVD', 'JA21')
  Seats: 76
  History Score: 1.05
  Ideology Score: 2.83
  EK Score: 0.4
  EK Seats: 30
  JSD Penalty: 0.08
  Party Penalty: 0
  Surplus Penalty: 0.0
  Final Score: 16.2%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'D66', 'CU')
  Seats: 78
  History Score: 1.17
  Ideology Score: 2.03
  EK Score: 1.0
  EK Seats: 38
  JSD Penalty: 0.05
  Party Penalty: 2
  Surplus Penalty: 0.0
  Final Score: 0.2%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'D66', '50PLUS')
  Seats: 76
  History Score: 1.17
  Ideology Score: 2.11
  EK Score: 0.48
  EK Seats: 36
  JSD Penalty: 0.08
  Party Penalty: 2
  Surplus Penalty: 0.0
  Final Score: 0%

Coalition: ('GL/PvdA', 'CDA', 'JA21', 'D66', '50PLUS')
  Seats: 76
  History Score: 0.83
  Ideology Score: 2.77
  EK Score: 0.35
  EK Seats: 26
  JSD Penalty: 0.1
  Party Penalty: 2
  Surplus Penalty: 0.0
  Final Score: 0%

Coalition: ('GL/PvdA', 'CDA', 'VVD', 'SP', 'DENK', 'PvdD')
  Seats: 76
  History Score: 1.05
  Ideology Score: 2.5

In [58]:
import json

# Add seat_distribution to each coalition for the visualizer
for p in predictions:
    p['seat_distribution'] = {party: seat_distribution[party] for party in p['coalition']}
    p['coalition'] = list(p['coalition'])  # Convert tuple to list for JSON

# Only coalitions with >0% final score, else top 3
filtered = [p for p in predictions if p['final_score'] > 0]
if not filtered:
    filtered = sorted(predictions, key=lambda x: -x['final_score'])[:3]

# Use poll-specific filename
any_filename = f"coalitions/coalition_data_any-{poll_info['filename_base']}.json"
with open(any_filename, 'w', encoding='utf-8') as f:
    json.dump(filtered, f, ensure_ascii=False, indent=2)
    
print(f"Saved coalition data (any) to: {any_filename}")

Saved coalition data (any) to: coalitions/coalition_data_any-04-10-2025-MauricedeHond.json


In [59]:
%run coalition-calculations.py

if __name__ == "__main__":
    kabinetten, zetels, ek_zetels, topic_vectors = load_data()
    coalition_counter = build_coalition_frequency(kabinetten)

#     # User Input  # <<—— Add the (alleged) seat distribution for the election you want to predict
#     seat_distribution = {

# }

    Jaar = 2025  # <<—— Add the election year for Eerste Kamer seat distribution

    predictions = predict_coalitions(
        seat_distribution, 
        coalition_counter, 
        ek_zetels=ek_zetels, 
        Jaar=Jaar, 
        threshold=76, 
        top_k=7,
        topic_vectors=topic_vectors
    )


    for p in predictions:
        print(f"Coalition: {p['coalition']}")
        print(f"  Seats: {p['seats']}")
        print(f"  History Score: {p['historical_score']}")
        print(f"  Ideology Score: {p['ideology_score']}")
        print(f"  EK Score: {p['ek_score']}")  # Optional: show EK alignment
        print(f"  EK Seats: {p['ek_total_seats']}")
        print(f"  JSD Penalty: {p['jsd_penalty']}")
        print(f"  Party Penalty: {p['party_penalty']}")
        print(f"  Surplus Penalty: {p['surplus_penalty']}")
        print(f"  Final Score: {p['final_score']}%")
        print("")


In [60]:
import json

# Add seat_distribution to each coalition for the visualizer
for p in predictions:
    p['seat_distribution'] = {party: seat_distribution[party] for party in p['coalition']}
    p['coalition'] = list(p['coalition'])  # Convert tuple to list for JSON

# Only coalitions with >0% final score, else top 3
filtered = [p for p in predictions if p['final_score'] > 0]
if not filtered:
    filtered = sorted(predictions, key=lambda x: -x['final_score'])[:3]

# Use poll-specific filename
with_biggest_filename = f"coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json"
with open(with_biggest_filename, 'w', encoding='utf-8') as f:
    json.dump(filtered, f, ensure_ascii=False, indent=2)
    
print(f"Saved coalition data (with biggest) to: {with_biggest_filename}")

Saved coalition data (with biggest) to: coalitions/coalition_data_with_biggest-04-10-2025-MauricedeHond.json


In [61]:
# Summary of generated files
print("="*60)
print("GENERATED FILES SUMMARY")
print("="*60)
print(f"Poll Information:")
print(f"  Name: {poll_info['name']}")
print(f"  Date: {poll_info['date']}")
print(f"  Filename Base: {poll_info['filename_base']}")
print()
print("Generated Files:")
print(f"  1. coalitions/verdeling-{poll_info['filename_base']}.json")
print(f"  2. coalitions/coalition_data_any-{poll_info['filename_base']}.json")
print(f"  3. coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json")
print()
print("File naming pattern: dd-mm-yyyy-PollsterName")
print("Example: 04-10-2025-MauricedeHond")
print("="*60)

# AUTO-UPDATE FALLBACK FILES
print("\n🔄 Updating fallback files...")

# Update fallback verdeling.json with current poll data
with open("coalitions/verdeling.json", "w", encoding="utf-8") as f:
    json.dump(seat_distribution, f, ensure_ascii=False, indent=2)
print("✅ Updated coalitions/verdeling.json")

# Update fallback coalition_data_with_biggest.json
try:
    with open(f"coalitions/coalition_data_with_biggest-{poll_info['filename_base']}.json", "r", encoding="utf-8") as f:
        coalition_data = json.load(f)
    with open("coalitions/coalition_data_with_biggest.json", "w", encoding="utf-8") as f:
        json.dump(coalition_data, f, ensure_ascii=False, indent=2)
    print("✅ Updated coalitions/coalition_data_with_biggest.json")
except Exception as e:
    print(f"⚠️  Could not update coalition_data_with_biggest.json: {e}")

# Update fallback coalition_data_any.json
try:
    with open(f"coalitions/coalition_data_any-{poll_info['filename_base']}.json", "r", encoding="utf-8") as f:
        coalition_data = json.load(f)
    with open("coalitions/coalition_data_any.json", "w", encoding="utf-8") as f:
        json.dump(coalition_data, f, ensure_ascii=False, indent=2)
    print("✅ Updated coalitions/coalition_data_any.json")
except Exception as e:
    print(f"⚠️  Could not update coalition_data_any.json: {e}")

print("\n🎉 Fallback files automatically updated!")
print("   Your website will now use the latest poll data as default.")

GENERATED FILES SUMMARY
Poll Information:
  Name: MauricedeHond
  Date: 04-10-2025
  Filename Base: 04-10-2025-MauricedeHond

Generated Files:
  1. coalitions/verdeling-04-10-2025-MauricedeHond.json
  2. coalitions/coalition_data_any-04-10-2025-MauricedeHond.json
  3. coalitions/coalition_data_with_biggest-04-10-2025-MauricedeHond.json

File naming pattern: dd-mm-yyyy-PollsterName
Example: 04-10-2025-MauricedeHond

🔄 Updating fallback files...
✅ Updated coalitions/verdeling.json
✅ Updated coalitions/coalition_data_with_biggest.json
✅ Updated coalitions/coalition_data_any.json

🎉 Fallback files automatically updated!
   Your website will now use the latest poll data as default.
