# Suspicious Zero-Vote Cases Analysis

This notebook analyzes the most suspicious zero-vote cases in the 2025 parliamentary elections using municipality-specific probabilities.

In [None]:
import pandas as pd
import requests
import zipfile
import io
from pathlib import Path

## Load Data

In [None]:
# Cache file paths
parquet_file = Path("election_data.parquet")
municipalities_file = Path("pscoco.csv")
parties_file = Path("cvs.csv")

# Load election data
if parquet_file.exists():
    df = pd.read_parquet(parquet_file)
else:
    print("Downloading election data...")
    url = "https://www.volby.cz/opendata/ps2025/csv_od/pst4p.zip"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    response = requests.get(url, headers=headers, timeout=30)
    response.raise_for_status()

    zip_file = zipfile.ZipFile(io.BytesIO(response.content))
    csv_filename = zip_file.namelist()[0]

    with zip_file.open(csv_filename) as f:
        df = pd.read_csv(f)

    df.to_parquet(parquet_file)

print(f"Loaded {len(df):,} rows")

In [None]:
# Load municipality names
if not municipalities_file.exists():
    print("Downloading municipality data...")
    url = "https://www.volby.cz/opendata/ps2025/csv_od/pscoco.csv"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    response = requests.get(url, headers=headers, timeout=30)
    response.raise_for_status()
    with open(municipalities_file, 'wb') as f:
        f.write(response.content)

municipalities = pd.read_csv(municipalities_file, encoding='utf-8')
municipality_names = dict(zip(municipalities['OBEC'], municipalities['NAZEVOBCE']))
print(f"Loaded {len(municipality_names):,} municipalities")

In [None]:
# Load party names
if not parties_file.exists():
    print("Downloading party data...")
    url = "https://www.volby.cz/opendata/ps2025/csv_od/cvs.csv"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    response = requests.get(url, headers=headers, timeout=30)
    response.raise_for_status()
    with open(parties_file, 'wb') as f:
        f.write(response.content)

parties = pd.read_csv(parties_file, encoding='utf-8')
party_names = dict(zip(parties['VSTRANA'], parties['ZKRATKAV8']))
print(f"Loaded {len(party_names):,} parties")

## Calculate Party Statistics

In [None]:
# Get overall party stats
party_totals = df.groupby('KSTRANA')['POC_HLASU'].sum().sort_values(ascending=False)
total_votes = party_totals.sum()
party_percentages = (party_totals / total_votes * 100).round(4)
party_probabilities = party_totals / total_votes

party_summary = pd.DataFrame({
    'Total_Votes': party_totals,
    'Percentage': party_percentages,
    'Probability': party_probabilities
})

top_parties = party_summary.head(7).index.tolist()
print("Top 7 parties analyzed:")
for party_code in top_parties:
    party_name = party_names.get(party_code, party_code)
    pct = party_summary.loc[party_code, 'Percentage']
    print(f"  - {party_name} ({party_code}): {pct:.2f}%")

## Calculate Municipality-Specific Party Performance

In [None]:
# Calculate party performance by OBEC
party_by_obec = df.groupby(['KSTRANA', 'OBEC'])['POC_HLASU'].agg(['sum', 'count']).reset_index()
party_by_obec.columns = ['Party', 'OBEC', 'Total_Votes', 'Num_Commissions']

obec_totals = df.groupby('OBEC')['POC_HLASU'].sum().reset_index()
obec_totals.columns = ['OBEC', 'OBEC_Total_Votes']

party_by_obec = party_by_obec.merge(obec_totals, on='OBEC')
party_by_obec['Vote_Share_In_OBEC'] = party_by_obec['Total_Votes'] / party_by_obec['OBEC_Total_Votes'] * 100
party_by_obec['Probability_In_OBEC'] = party_by_obec['Vote_Share_In_OBEC'] / 100

## Find Zero-Vote Cases

In [None]:
# Find zero-vote cases
all_commissions = df['ID_OKRSKY'].unique()
all_combinations = pd.MultiIndex.from_product(
    [top_parties, all_commissions],
    names=['Party', 'Commission_ID']
).to_frame(index=False)

actual_combinations = df[df['KSTRANA'].isin(top_parties)][['KSTRANA', 'ID_OKRSKY']].copy()
actual_combinations.columns = ['Party', 'Commission_ID']
actual_combinations['Present'] = True

combined = all_combinations.merge(actual_combinations, on=['Party', 'Commission_ID'], how='left')
zero_votes = combined[combined['Present'].isna()][['Party', 'Commission_ID']].copy()

# Get commission info
commission_info = df.groupby('ID_OKRSKY').agg({
    'POC_HLASU': 'sum',
    'OBEC': 'first'
}).reset_index()
commission_info.columns = ['Commission_ID', 'Total_Votes_In_Commission', 'OBEC']

zero_votes_df = zero_votes.merge(commission_info, on='Commission_ID')

print(f"Found {len(zero_votes_df):,} zero-vote cases")

## Calculate Probabilities

In [None]:
# Merge OBEC-specific probabilities
zero_votes_df = zero_votes_df.merge(
    party_by_obec[['Party', 'OBEC', 'Vote_Share_In_OBEC', 'Probability_In_OBEC']],
    on=['Party', 'OBEC'],
    how='left'
)

# Add overall party stats
zero_votes_df['Party_Percentage'] = zero_votes_df['Party'].map(
    lambda x: party_summary.loc[x, 'Percentage']
)
zero_votes_df['Party_Probability_Overall'] = zero_votes_df['Party'].map(
    lambda x: party_summary.loc[x, 'Probability']
)

# Calculate probabilities
zero_votes_df['Probability_of_Zero_Overall'] = (
    1 - zero_votes_df['Party_Probability_Overall']
) ** zero_votes_df['Total_Votes_In_Commission']

zero_votes_df['Probability_of_Zero_OBEC'] = (
    1 - zero_votes_df['Probability_In_OBEC']
) ** zero_votes_df['Total_Votes_In_Commission']

zero_votes_df['Used_OBEC_Probability'] = zero_votes_df['Probability_In_OBEC'].notna()

zero_votes_df['Probability_of_Zero'] = zero_votes_df['Probability_of_Zero_OBEC'].fillna(
    zero_votes_df['Probability_of_Zero_Overall']
)

zero_votes_df['Is_Suspicious'] = (
    (zero_votes_df['Probability_of_Zero'] < 0.01) &
    zero_votes_df['Used_OBEC_Probability']
)

zero_votes_df['Is_Highly_Suspicious'] = (
    (zero_votes_df['Probability_of_Zero'] < 0.001) &
    zero_votes_df['Used_OBEC_Probability']
)

## Summary Statistics

In [None]:
print("Summary Statistics:")
print(f"Total zero-vote cases analyzed: {len(zero_votes_df):,}")
print(f"Cases with municipality-specific data: {zero_votes_df['Used_OBEC_Probability'].sum():,}")
print(f"Suspicious cases (P < 1%): {zero_votes_df['Is_Suspicious'].sum()}")
print(f"Highly suspicious (P < 0.1%): {zero_votes_df['Is_Highly_Suspicious'].sum()}")

## Top 30 Most Suspicious Cases

In [None]:
# Get the most suspicious cases
most_suspicious = zero_votes_df[
    zero_votes_df['Used_OBEC_Probability']
].sort_values('Probability_of_Zero').head(30)

# Create enriched summary table with party and municipality names
summary_data = []
for idx, row in most_suspicious.iterrows():
    party_code = row['Party']
    party_name = party_names.get(party_code, str(party_code))
    obec_code = row['OBEC']
    obec_name = municipality_names.get(obec_code, f"Unknown ({obec_code})")
    
    status = 'HIGHLY SUSPICIOUS' if row['Is_Highly_Suspicious'] else (
        'SUSPICIOUS' if row['Is_Suspicious'] else 'Notable'
    )
    
    summary_data.append({
        'Rank': len(summary_data) + 1,
        'Party': party_name,
        'Party_Code': party_code,
        'Municipality': obec_name,
        'OBEC_Code': obec_code,
        'Commission_ID': row['Commission_ID'],
        'Commission_Size': int(row['Total_Votes_In_Commission']),
        'Party_Share_in_Municipality_%': f"{row['Vote_Share_In_OBEC']:.2f}",
        'Party_National_%': f"{row['Party_Percentage']:.1f}",
        'Probability_of_Zero': f"{row['Probability_of_Zero_OBEC']:.2e}",
        'Status': status
    })

summary_df = pd.DataFrame(summary_data)
summary_df

## Export Results

In [None]:
# Save to CSV for further analysis
output_file = 'suspicious_cases_top30.csv'
summary_df.to_csv(output_file, index=False, encoding='utf-8')
print(f"Results saved to {output_file}")

## Detailed View of Top 10 Cases

In [None]:
# Show detailed information for the top 10 cases
print("Top 10 Most Suspicious Cases (Detailed View)")
print("=" * 100)

for i, row in summary_df.head(10).iterrows():
    print(f"\n{row['Rank']}. {row['Party']} (Code: {row['Party_Code']})")
    print(f"   Municipality: {row['Municipality']} (OBEC: {row['OBEC_Code']})")
    print(f"   Commission ID: {row['Commission_ID']}")
    print(f"   Commission size: {row['Commission_Size']} votes")
    print(f"   Party's share in this municipality: {row['Party_Share_in_Municipality_%']}%")
    print(f"   Party's national share: {row['Party_National_%']}%")
    print(f"   Probability of 0 votes: {row['Probability_of_Zero']}")
    print(f"   Status: {row['Status']}")
    print("-" * 100)