In [1]:
import pandas as pd

ODDS = '/kaggle/input/2024-us-presidential-election-odds/2024ElectionOdds.csv'
df = pd.read_csv(filepath_or_buffer=ODDS, parse_dates=['date'], dayfirst=True,)
# our data is quoted in odds against and we need percent chances
odds_df = df.copy()
for column in odds_df.columns[1:]:
    odds_df[column] = odds_df[column].apply(func=lambda x: 100/x)

Most of these names are silly, but let's graph the whole thing before we proceed any further. If we graph the data as is we see the two front-runners and then we also see a small rotating cast of people who take turns being the third choice. If we graph on a log scale we can see everyone, even though the difference between having a 0.1% chance and having a 0% chance is really a matter of opinion.

In [2]:
from plotly.express import line
for log_y  in [False, True]:
    line(data_frame=odds_df, x='date', y=odds_df.columns[1:], log_y=log_y, height=900).show()

We can pick one candidate to be our floor candidate - meaning that anyone who doesn't do as well or better than that candidate has chances too small to graph - and we get a much clearer graph.

In [3]:
import numpy as np
t_df = odds_df.copy()
for column in t_df.columns[1:]:
    t_df[column] = np.where(t_df[column] < t_df['Kamala Harris'], np.nan, t_df[column])
t_df = t_df.drop(columns=[column for column in t_df.columns if t_df[column].isna().sum() == len(t_df)])
line(data_frame=t_df, x='date', y=t_df.columns[1:], log_y=False, ).show()