In [34]:
import os
import re
import json

import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
tqdm.pandas()

In [29]:
REGIONS = [
    'Montréal',
    'Greater Toronto Area',
    'Vancouver'
]

CSDs = [
    'Halifax',
    'Québec',
    'Island of Montreal',
    'Ottawa',
    'Toronto',
    'Winnipeg',
    'Calgary',
    'Edmonton',
    'Vancouver'
]

PARTY_TAGS_MAP = {
    'Liberal Party of Canada': 'lib',
    'Conservative Party of Canada': 'con',
    'New Democratic Party': 'ndp',
    'Bloc Québécois': 'bloc',
    'Green Party of Canada': 'grn',
    "People's Party of Canada": 'ppc',
    'Other': 'oth',
}

In [36]:
df_results = pd.read_csv('../data/fed_results.csv')
df_ridings = pd.read_csv('../data/fed_ridings_tagged.csv')

# Filter out rejected ballots
df_results = df_results[df_results['party_name'] != 'Rejected']

# Add dummy results for 2025
df_results['2025_num_votes'] = np.random.choice(50000, df_results.shape[0])

Generate the percentage of votes for each party in each CSD we target, and each region we target, as separate files.

In [27]:
# Merge riding metadata into results
df = df_results.merge(df_ridings[['FED_NUM', 'CSD', 'region']], on='FED_NUM', how='left')

def compute_vote_shares(df, group_col, output_path):
    """
    Compute percentage vote shares by group_col (CSD or region).
    """
    # Filter for desired regions or CSDs
    df = df[df[group_col].isin(CSDs if group_col == 'CSD' else REGIONS)]

    # Sum votes by group and party
    grouped = df.groupby([group_col, 'party_name'])[['2021_num_votes', '2025_num_votes']].sum().reset_index()

    # Compute total votes per group (excluding Rejected already)
    totals = grouped.groupby(group_col)[['2021_num_votes', '2025_num_votes']].sum().rename(columns={
        '2021_num_votes': '2021_total',
        '2025_num_votes': '2025_total'
    }).reset_index()

    # Merge totals back
    merged = grouped.merge(totals, on=group_col)

    # Calculate percentages
    merged['2021_pct_vote'] = (merged['2021_num_votes'] / merged['2021_total']) * 100
    merged['2025_pct_vote'] = (merged['2025_num_votes'] / merged['2025_total']) * 100
    merged['pct_vote_change'] = merged['2025_pct_vote'] - merged['2021_pct_vote']

    # Final formatting
    final = merged[[group_col, 'party_name', '2021_pct_vote', '2025_pct_vote', 'pct_vote_change']]
    final.loc[:, ['2021_pct_vote', '2025_pct_vote', 'pct_vote_change']] = final[
        ['2021_pct_vote', '2025_pct_vote', 'pct_vote_change']
    ].round(4)

    # Save to CSV
    final.to_csv(output_path, index=False)
    return final

# Compute for both CSDs and regions
csd_final = compute_vote_shares(df.copy(), 'CSD', '../data/results/votes/csd_results.csv')
region_final = compute_vote_shares(df.copy(), 'region', '../data/results/votes/region_results.csv')

In [35]:
# Helper function to convert a dataframe to the nested dictionary format
def df_to_nested_dict(df, group_col):
    output = {}
    for name, group in df.groupby(group_col):
        output[name] = {
            '2021_pct_vote': {},
            '2025_pct_vote': {},
            'pct_vote_change': {}
        }
        for _, row in group.iterrows():
            tag = PARTY_TAGS_MAP.get(row['party_name'], row['party_name'])
            output[name]['2021_pct_vote'][tag] = row['2021_pct_vote']
            output[name]['2025_pct_vote'][tag] = row['2025_pct_vote']
            output[name]['pct_vote_change'][tag] = row['pct_vote_change']
    return output

# Generate nested dictionaries
csd_results_dict = df_to_nested_dict(csd_final, 'CSD')
region_results_dict = df_to_nested_dict(region_final, 'region')

# Save CSD results to JSON
with open('../data/results/votes/csd_results.json', 'w') as f:
    json.dump(csd_results_dict, f, indent=4)

# Save region results to JSON
with open('../data/results/votes/region_results.json', 'w') as f:
    json.dump(region_results_dict, f, indent=4)