# Iowa Electronic Markets (IEM) 2000 Presidential Winner-Takes-All Market

## Data wrangling

In [1]:
import datetime 

# helper function to standardize date formatting between datasets
def format_date(date):
    return datetime.datetime.strptime(date, '%m/%d/%y').strftime('20%y-%m-%d')

In [2]:
import pandas as pd

iem_prices_df = pd.read_csv('iem_2000.txt', delimiter = '\t')
iem_prices_df = iem_prices_df.drop(['    Units', '    $Volume', '    LowPrice', '    HighPrice', '    AvgPrice'], axis = 1)
iem_prices_df['Date'] = iem_prices_df['Date'].apply(lambda x: format_date(x))
iem_prices_df = iem_prices_df.set_index('Date')

### Democratic candidate prices 

In [3]:
dem_prices_df = iem_prices_df[iem_prices_df['    Contract'].str.contains('Dem')]
dem_prices_df

Unnamed: 0_level_0,Contract,LastPrice
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-05-01,Dem,0.550
2000-05-02,Dem,0.508
2000-05-03,Dem,0.510
2000-05-04,Dem,0.513
2000-05-05,Dem,0.524
...,...,...
2000-10-27,Dem,0.387
2000-10-28,Dem,0.287
2000-10-29,Dem,0.354
2000-10-30,Dem,0.375


### Republican candidate prices

In [4]:
rep_prices_df = iem_prices_df[iem_prices_df['    Contract'].str.contains('Rep')]
rep_prices_df

Unnamed: 0_level_0,Contract,LastPrice
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-05-01,Rep,0.500
2000-05-02,Rep,0.498
2000-05-03,Rep,0.491
2000-05-04,Rep,0.490
2000-05-05,Rep,0.482
...,...,...
2000-10-27,Rep,0.620
2000-10-28,Rep,0.681
2000-10-29,Rep,0.670
2000-10-30,Rep,0.610


In [5]:
dem_normalized_prices = dem_prices_df['    LastPrice'] / (dem_prices_df['    LastPrice'] + rep_prices_df['    LastPrice'])
dem_normalized_prices_df = dem_normalized_prices.to_frame()
dem_normalized_prices_df = dem_normalized_prices_df.rename(columns = {'    LastPrice': 'NormalizedPrice'})
dem_normalized_prices_df

Unnamed: 0_level_0,NormalizedPrice
Date,Unnamed: 1_level_1
2000-05-01,0.523810
2000-05-02,0.504970
2000-05-03,0.509491
2000-05-04,0.511466
2000-05-05,0.520875
...,...
2000-10-27,0.384310
2000-10-28,0.296488
2000-10-29,0.345703
2000-10-30,0.380711


## Election coverage vs. election forecast

In [None]:
election_prices_topics = pd.concat([dem_normalized_prices_df, topic_coverage], axis = 1, join = 'inner')

election_prices_topics