In [None]:
import pandas as pd

# 1. Define the base election dates
events = pd.DataFrame({
    'year': [2000, 2004, 2008, 2012, 2016, 2020, 2024],
    'election_date': pd.to_datetime([
        '2000-11-07', '2004-11-02', '2008-11-04',
        '2012-11-06', '2016-11-08', '2020-11-03', '2024-11-05'
    ])
})  

# 2. Compute the start date (three months before each election)
#    pd.DateOffset: used to add or subtract a fixed period (here, 3 months)
events['start_date'] = events['election_date'] - pd.DateOffset(months=3)

# 3. Generate a row for each day between start_date and election_date
events = (
    events
    .assign(
        # For each row, create a daily date range from start_date to election_date
        date=lambda df: df.apply(
            lambda row: pd.date_range(
                start=row.start_date,      # range start
                end=row.election_date,     # range end
                freq='D'                   # 'D' = daily frequency
            ),
            axis=1                       # apply per row
        )
    )
    # explode turns each list in the 'date' column into separate rows
    .explode('date')
    # reset_index drops the old index and reindexes from 0,1,...
    .reset_index(drop=True)
)

# 4. Preview the first few rows
events.to_csv('events.csv', index=False)


   year       date
0  2000 2000-08-07
1  2000 2000-08-08
2  2000 2000-08-09
3  2000 2000-08-10
4  2000 2000-08-11
5  2000 2000-08-12
6  2000 2000-08-13
7  2000 2000-08-14
8  2000 2000-08-15
9  2000 2000-08-16


In [None]:
import pandas as pd

# 1968-2016 General (fivethirtyeight)
polls = pd.read_csv(
    'https://raw.githubusercontent.com/fivethirtyeight/data/master/polls/pres_pollaverages_1968-2016.csv',
    parse_dates=['modeldate']
)

# 2020 General (fivethirtyeight)
polls_2 = pd.read_csv(
    'https://raw.githubusercontent.com/fivethirtyeight/data/refs/heads/master/polls/2024-averages/presidential_general_averages_2024-09-12_uncorrected.csv',
    parse_dates=['date']
)

#2024

# National Average
National = polls[polls['state'] == 'National'].copy()
National2 = polls_2[polls_2['state'] == 'National'].copy()

National2['pct_trend_adjusted'] = National2['pct_trend_adjusted'].fillna(
    National2['pct_estimate']
)


data1 = National[['cycle','state','modeldate','candidate_name','pct_trend_adjusted']]
data2 = National2[['cycle','state','date','candidate','pct_trend_adjusted']]


# align column names
data2.columns=['cycle','state','modeldate','candidate_name','pct_trend_adjusted']
data2.sort_values('modeldate', ascending=True, inplace=True)

# main data for trading
polldata = pd.concat([data2,data1],axis=0)

polldata.to_csv('polldata.csv')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data2.sort_values('modeldate', ascending=True, inplace=True)


In [19]:
polldata[polldata['cycle']==2016]

Unnamed: 0,cycle,state,modeldate,candidate_name,pct_trend_adjusted
12,2016,National,2016-03-03,Donald Trump,42.44001
38,2016,National,2016-03-03,Hillary Rodham Clinton,47.13026
65,2016,National,2016-03-04,Donald Trump,42.10922
92,2016,National,2016-03-04,Hillary Rodham Clinton,47.07659
119,2016,National,2016-03-05,Donald Trump,41.84980
...,...,...,...,...,...
28303,2016,National,2016-11-07,Donald Trump,42.01038
28360,2016,National,2016-11-07,Hillary Rodham Clinton,45.81397
28415,2016,National,2016-11-08,Gary Johnson,4.83034
28472,2016,National,2016-11-08,Donald Trump,42.01038


In [None]:
polls_2[(polls_2['cycle']==2016)]ㅈ

Unnamed: 0,candidate,date,pct_trend_adjusted,state,cycle,party,pct_estimate,hi,lo
