Some dates are missing from each scenario. For now, I forward fill using the latest estimate. In another iteration of the model, I would try to leverage national poll data (after some preprocessing) to capture any more recent trends since national polls run more frequently. 

In [1]:
import os
import pandas as pd

In [2]:
# load national results

directory = '/Users/kbeebe/Desktop/economist_forecasting/election_forecaster/election_forecaster/results/'

filelist_national = [file for file in os.listdir(directory) if file.startswith('national')]
filelist_provincial = [file for file in os.listdir(directory) if file.startswith('provincial')]

# Define start and end of campaign
campaign_start = "2024-04-02"
election_day = "2024-06-15"

# Define the complete date range
date_range = pd.date_range(start='2024-04-02', end='2024-06-16', freq='D')

# Create a template DataFrame with the complete date range
template_df = pd.DataFrame({'date': date_range})

In [3]:
# Load each file, merge with the template DataFrame, and forward fill missing values
for file in filelist_national:
    # Read the CSV file
    df = pd.read_csv(os.path.join(directory, file))
    df['date']=pd.DatetimeIndex(df['date'])
    
    # Merge with the template DataFrame on 'Date' column and forward fill missing values
    df = pd.merge(template_df, df, on='date', how='left')
    df = df.ffill()
    
    # Save the updated DataFrame
    df.to_csv(os.path.join(directory, file), index=False)

In [6]:
# Load each file, merge with the template DataFrame, and forward fill missing values
# Create an empty dictionary to store DataFrames by province
dfs_by_province = {}
for file in filelist_provincial:
    # Read the CSV file
    df = pd.read_csv(os.path.join(directory, file))
    df['date']=pd.DatetimeIndex(df['date'])
    
    # Group by province and forward fill missing values within each group
    grouped = df.groupby('province')
    filled_dfs = []
    for province, group_df in grouped:
        merged_df = pd.merge(template_df, group_df, on='date', how='left')
        filled_df = merged_df.ffill()
        filled_dfs.append(filled_df)
    
    # Combine the filled DataFrames for each province and store in the dictionary
    result_df = pd.concat(filled_dfs)
    dfs_by_province[file] = result_df
    
    # Save the updated DataFrame
    result_df.to_csv(os.path.join(directory, file), index=False)