In [1214]:
import pandas as pd
import numpy as np

from mlgear.utils import show, display_columns
from itertools import chain


def clean_party(party):
    if not isinstance(party, str):
        return 'OTH'
    party = party.lower()
    if party.startswith('dem'):
        return 'DEM'
    elif party.startswith('rep'):
        return 'REP'
    elif party.startswith('ind'):
        return 'IND'
    else:
        return 'OTH'


def clean_election(election):
    if '-' in election:
        election = election.split('-')[0]
    if election == 'Sen':
        return 'senate'
    elif election == 'Pres':
        return 'president'
    elif election == 'Gov':
        return 'governor'
    else:
        return election
    
    
def clean_grade(grade):
    if grade in ['A+', 'A', 'A-']:
        return 'A'
    elif grade in ['A/B', 'B+', 'B', 'B-']:
        return 'B'
    elif grade in ['B/C', 'C+', 'C', 'C/D']:
        return 'C'
    elif grade in ['C-', 'D+', 'D', 'D-', 'F']:
        return 'D/F'
    else:
        return grade


def clean_candidate(candidate):
    if ',' in candidate:
        candidate = candidate.split(',')[0].strip()
    candidate = candidate.replace('.', '')
    candidate = candidate.title()
    if candidate.count(' ') == 3 and (candidate.endswith('Jr') or candidate.endswith('Sr')):
        candidate = candidate.split(' ')[2].strip()
    elif candidate.count(' ') == 3:
        candidate = candidate.split(' ')[3].strip()
    elif candidate.count(' ') == 1 or \
       (candidate.count(' ') == 2 and (candidate.endswith('Jr') or candidate.endswith('Sr'))):
            candidate = candidate.split(' ')[1].strip()
    elif candidate.count(' ') == 2:
        candidate = candidate.split(' ')[2].strip()
    return candidate


us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maine-1': 'M1',
    'Maine-2': 'M2',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nebraska-1': 'N1',
    'Nebraska-2': 'N2',
    'Nebraska-3': 'N3',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

def state_to_abbrev(state):
    return us_state_abbrev.get(state.title(), state.title())

def abbrev_to_state(abbrev):
    return abbrev_us_state.get(abbrev.upper(), abbrev.upper())


In [1215]:
polls = pd.read_csv('data/polls_auxiliary_dataset.tsv', sep='\t')
polls2 = pd.read_csv('data/polls_main_dataset.tsv', sep='\t')
polls = pd.concat((polls, polls2)).reset_index(drop=True)

polls['populationType'] = polls['populationType'].fillna('LV')
polls = polls[polls['populationType'].isin(['LV', 'Likely Voters', 'lv'])]
polls = polls.drop('populationType', axis=1)

polls['dem_actual'] = polls['finalTwoPartyVSDemocratic']
polls['rep_actual'] = polls['finalTwoPartyVSRepublican']
polls['dem_polled'] = polls['democratic']
polls['rep_polled'] = polls['republican']
polls['ind_polled'] = 0
polls['ind_actual'] = -1
polls['dem_candidate'] = 'NoName'
polls['undecided_polled'] = 100 - polls['dem_polled'] - polls['rep_polled']
polls['N'] = polls['numberOfRespondents']
polls['methodology'] = polls['Mode'].fillna('Unknown')
polls['pollster'] = polls['pollName']
polls['election'] = polls['election'].apply(clean_election)
polls['stage'] = 'general'
polls['population'] = 'lv'
polls['state'] = polls['state'].fillna('National').apply(abbrev_to_state)
polls['538grade'] = 'Unknown'
polls['days_until'] = (pd.to_datetime(polls['electionDate']) - pd.to_datetime(polls['endDate'])).dt.days

polls = polls[['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N', 'methodology', 'population',
               'days_until', '538grade', 'dem_polled', 'rep_polled', 'ind_polled', 'undecided_polled',
               'dem_actual', 'rep_actual', 'ind_actual']]
show(polls)

        election          state  year    stage dem_candidate  \
0      president        Alabama  2004  general        NoName   
1      president        Alabama  2004  general        NoName   
2      president        Alabama  2004  general        NoName   
3      president        Alabama  2004  general        NoName   
4      president        Alabama  2004  general        NoName   
...          ...            ...   ...      ...           ...   
14660     senate  New Hampshire  2014  general        NoName   
14661     senate  New Hampshire  2014  general        NoName   
14662     senate  New Hampshire  2014  general        NoName   
14663     senate  New Hampshire  2014  general        NoName   
14664     senate  New Hampshire  2014  general        NoName   

                               pollster    N methodology population  \
0                            SurveyUSA   735     Unknown         lv   
1                       Capital Survey   645     Unknown         lv   
2                 

In [1216]:
polls_538_all = None

for p in ['senate', 'president', 'house', 'governor']:
    print('{}...'.format(p))
    polls_538 = pd.read_csv('data/{}_polls.csv'.format(p))
    polls_538['candidate'] = polls_538['candidate_name']
    polls_538['party'] = polls_538['candidate_party'].apply(clean_party)
    polls_538['poll_date'] = pd.to_datetime(polls_538['start_date'])
    polls_538['election_date'] = pd.to_datetime(polls_538['election_date'])
    polls_538['days_until'] = (polls_538['election_date'] - polls_538['poll_date']).dt.days
    polls_538['year'] = polls_538['cycle']
    polls_538['election'] = p
    polls_538['candidate'] = polls_538['candidate_name'].apply(clean_candidate)
    polls_538['N'] = polls_538['sample_size']
    polls_538['538grade'] = polls_538['fte_grade'].fillna('Unknown')
    
    polls_538 = polls_538[['election', 'state', 'year', 'stage', 'candidate', 'pollster', 'N', 'methodology',
                           'population', 'days_until', '538grade', 'party', 'pct']]
    if polls_538_all is None:
        polls_538_all = polls_538
    else:
        polls_538_all = pd.concat((polls_538_all, polls_538)).reset_index(drop=True)
        
show(polls_538_all)

senate...
president...
house...
governor...
       election       state  year    stage candidate  \
0        senate     Georgia  2020   runoff    Ossoff   
1        senate     Georgia  2020   runoff    Perdue   
2        senate     Georgia  2020   runoff   Warnock   
3        senate     Georgia  2020   runoff  Loeffler   
4        senate     Georgia  2020   runoff    Ossoff   
...         ...         ...   ...      ...       ...   
25989  governor      Nevada  2018  general    Laxalt   
25990  governor  New Mexico  2018  general   Grisham   
25991  governor  New Mexico  2018  general    Pearce   
25992  governor    New York  2018  general     Cuomo   
25993  governor    New York  2018  general    Gibson   

                       pollster           N        methodology population  \
0                    AtlasIntel  857.000000  Live Phone/Online         lv   
1                    AtlasIntel  857.000000  Live Phone/Online         lv   
2                    AtlasIntel  857.000000  Live Ph

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [1217]:
polls_538_all_ = polls_538_all
polls_538_all = polls_538_all[polls_538_all['party'] == 'DEM'].merge(polls_538_all[polls_538_all['party'] == 'REP'],
                                                                     how='left',
                                                                     on=['election', 'state', 'year', 'pollster', 'N',
                                                                        'stage', 'population', '538grade',
                                                                         'methodology', 'days_until'])

polls_538_all = polls_538_all.merge(polls_538_all_[polls_538_all_['party'] == 'IND'],
                                    how='left',
                                    on=['election', 'state', 'year', 'pollster', 'N',
                                        'stage', 'population', '538grade', 'methodology',
                                        'days_until'])

polls_538_all.columns = ['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N', 'methodology',
                         'population', 'days_until', '538grade', 'dem_party', 'dem_polled', 'rep_candidate',
                         'rep_party', 'rep_polled', 'ind_candidate', 'ind_party', 'ind_polled']

show(polls_538_all)

       election       state  year    stage dem_candidate  \
0        senate     Georgia  2020   runoff        Ossoff   
1        senate     Georgia  2020   runoff        Ossoff   
2        senate     Georgia  2020   runoff       Warnock   
3        senate     Georgia  2020   runoff       Warnock   
4        senate     Georgia  2020   runoff        Ossoff   
...         ...         ...   ...      ...           ...   
24966  governor        Ohio  2018  general       Cordray   
24967  governor     Arizona  2018  general        Garcia   
24968  governor      Nevada  2018  general       Sisolak   
24969  governor  New Mexico  2018  general       Grisham   
24970  governor    New York  2018  general         Cuomo   

                       pollster           N        methodology population  \
0                    AtlasIntel  857.000000  Live Phone/Online         lv   
1                    AtlasIntel  857.000000  Live Phone/Online         lv   
2                    AtlasIntel  857.000000  Liv

In [1218]:
results = pd.read_csv('data/1976-2020-president.csv', encoding='ISO-8859-1')
results['party'] = results['party_simplified'].apply(clean_party)
results['election'] = 'president'
results['candidate'] = results['candidate'].fillna('None').apply(clean_candidate)
results['state'] = results['state'].apply(lambda s: s.title())
results['stage'] = 'general'
results['actual_pct'] = results['candidatevotes'] / results['totalvotes'] * 100
results = results[['election', 'state', 'year', 'stage', 'candidate', 'party', 'actual_pct']]

results_sen = pd.read_csv('data/1976-2020-senate.csv', encoding='ISO-8859-1')
results_sen['party'] = results_sen['party_simplified'].apply(clean_party)
results_sen['election'] = 'senate'
results_sen['candidate'] = results_sen['candidate'].fillna('None').apply(clean_candidate)
results_sen['state'] = results_sen['state'].apply(lambda s: s.title())
results_sen['stage'] = results_sen['stage'].apply(lambda x: 'general' if x == 'gen' else x)
results_sen['actual_pct'] = results_sen['candidatevotes'] / results_sen['totalvotes'] * 100
results_sen = results_sen[['election', 'state', 'year', 'stage', 'candidate', 'party', 'actual_pct']]

results = pd.concat((results, results_sen)).reset_index(drop=True)
show(results)

       election    state  year    stage candidate party  actual_pct
0     president  Alabama  1976  general    Carter   DEM   55.727269
1     president  Alabama  1976  general      Ford   REP   42.614871
2     president  Alabama  1976  general    Maddox   OTH    0.777613
3     president  Alabama  1976  general     Bubar   OTH    0.563808
4     president  Alabama  1976  general      Hall   OTH    0.165194
...         ...      ...   ...      ...       ...   ...         ...
7911     senate  Wyoming  2020  general     Votes   OTH    2.298359
7912     senate  Georgia  2021   runoff  Loeffler   REP   48.956147
7913     senate  Georgia  2021   runoff   Warnock   DEM   51.043853
7914     senate  Georgia  2021   runoff    Perdue   REP   49.383448
7915     senate  Georgia  2021   runoff    Ossoff   DEM   50.616552

[7916 rows x 7 columns]
(7916, 7)


In [1219]:
results_ = results
results = results[results['party'] == 'DEM'].merge(results[results['party'] == 'REP'],
                                                   how='left',
                                                   on=['election', 'state', 'year', 'stage'])

results = results.merge(results_[results_['party'] == 'OTH'],
                        how='left',
                        on=['election', 'state', 'year', 'stage'])

results.columns = ['election', 'state', 'year', 'stage', 'dem_candidate', 'dem_party', 'dem_actual', 'rep_candidate',
                   'rep_party', 'rep_actual', 'ind_candidate', 'ind_party', 'ind_actual']

show(results)

       election          state  year    stage dem_candidate dem_party  \
0     president        Alabama  1976  general        Carter       DEM   
1     president        Alabama  1976  general        Carter       DEM   
2     president        Alabama  1976  general        Carter       DEM   
3     president        Alabama  1976  general        Carter       DEM   
4     president        Alabama  1976  general        Carter       DEM   
...         ...            ...   ...      ...           ...       ...   
6706     senate  West Virginia  2020  general    Swearengin       DEM   
6707     senate        Georgia  2021   runoff       Warnock       DEM   
6708     senate        Georgia  2021   runoff       Warnock       DEM   
6709     senate        Georgia  2021   runoff        Ossoff       DEM   
6710     senate        Georgia  2021   runoff        Ossoff       DEM   

      dem_actual rep_candidate rep_party  rep_actual ind_candidate ind_party  \
0      55.727269          Ford       REP   

In [1220]:
results['candidate_sum'] = results['dem_actual'] + results['rep_actual'] + results['ind_actual']
results = results[results['candidate_sum'] <= 100]
results = results.sort_values('candidate_sum', ascending=False).drop_duplicates(['election', 'state', 'year', 'stage'],
                                                                                keep='first')

In [1221]:
results.loc[(results['election'] == 'senate') & (results['year'] == 2020) & (results['state'] == 'Arizona')]

Unnamed: 0,election,state,year,stage,dem_candidate,dem_party,dem_actual,rep_candidate,rep_party,rep_actual,ind_candidate,ind_party,ind_actual,candidate_sum
6024,senate,Arizona,2020,general,Kelly,DEM,51.156779,Mcsally,REP,48.808082,Dorchester,OTH,0.011296,99.976157


In [1222]:
polls_538_all = polls_538_all.merge(results, on=['election', 'state', 'year', 'stage', 'dem_candidate'], how='left')
polls_538_all['ind_polled'] = polls_538_all['ind_polled'].fillna(0)
polls_538_all['ind_actual'] = polls_538_all['ind_actual'].fillna(0)
polls_538_all['undecided_polled'] = 100 - polls_538_all['dem_polled'] - polls_538_all['rep_polled'] - polls_538_all['ind_polled']
polls_538_all = polls_538_all[['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N',
                               'methodology', 'population', 'days_until', '538grade', 'dem_polled',
                               'rep_polled', 'ind_polled', 'undecided_polled' , 'dem_actual', 'rep_actual',
                               'ind_actual']]
show(polls_538_all)

       election       state  year    stage dem_candidate  \
0        senate     Georgia  2020   runoff        Ossoff   
1        senate     Georgia  2020   runoff        Ossoff   
2        senate     Georgia  2020   runoff       Warnock   
3        senate     Georgia  2020   runoff       Warnock   
4        senate     Georgia  2020   runoff        Ossoff   
...         ...         ...   ...      ...           ...   
24966  governor        Ohio  2018  general       Cordray   
24967  governor     Arizona  2018  general        Garcia   
24968  governor      Nevada  2018  general       Sisolak   
24969  governor  New Mexico  2018  general       Grisham   
24970  governor    New York  2018  general         Cuomo   

                       pollster           N        methodology population  \
0                    AtlasIntel  857.000000  Live Phone/Online         lv   
1                    AtlasIntel  857.000000  Live Phone/Online         lv   
2                    AtlasIntel  857.000000  Liv

In [1223]:
raw_polls = pd.read_csv('data/538_raw_polls.csv')
raw_polls = raw_polls.loc[raw_polls['type_simple'].isin(['Pres-G', 'Sen-G', 'Gov-G', 'House-G'])]
raw_polls['state'] = raw_polls['location'].apply(abbrev_to_state)
raw_polls['election'] = raw_polls['type_simple'].apply(clean_election)
raw_polls['stage'] = 'general'
raw_polls['N'] = raw_polls['samplesize']
raw_polls = raw_polls.merge(ratings_538, left_on='pollster_rating_id', right_on='Pollster Rating ID')
raw_polls['methodology'] = raw_polls['Methodology']
raw_polls['population'] = 'lv'
raw_polls['538grade'] = raw_polls['538 Grade']
raw_polls['days_until'] = (pd.to_datetime(raw_polls['electiondate']) - pd.to_datetime(raw_polls['polldate'])).dt.days
raw_polls = raw_polls[raw_polls['cand1_party'] == 'DEM']
raw_polls['dem_candidate'] = raw_polls['cand1_name'].apply(clean_candidate)
raw_polls['dem_polled'] = raw_polls['cand1_pct']
raw_polls['rep_polled'] = raw_polls['cand2_pct']
raw_polls['ind_polled'] = 0
raw_polls['dem_actual'] = raw_polls['cand1_actual']
raw_polls['rep_actual'] = raw_polls['cand2_actual']
raw_polls['ind_actual'] = 0
raw_polls['undecided_polled'] = 100 - raw_polls['dem_polled'] - raw_polls['rep_polled']
raw_polls = raw_polls[['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N', 'methodology',
                       'population', 'days_until', '538grade', 'dem_polled', 'rep_polled', 'ind_polled',
                       'undecided_polled', 'dem_actual', 'rep_actual', 'ind_actual']]
show(raw_polls)

      election        state  year    stage dem_candidate  \
0     governor     New York  1998  general       Vallone   
1       senate     New York  1998  general       Schumer   
2     governor     New York  1998  general       Vallone   
3     governor        Texas  1998  general         Mauro   
4       senate     New York  1998  general       Schumer   
...        ...          ...   ...      ...           ...   
7714  governor    Louisiana  2019  general       Edwards   
7715  governor    Louisiana  2019  general       Edwards   
7716  governor  Mississippi  2019  general          Hood   
7717  governor    Louisiana  2019  general       Edwards   
7718  governor    Louisiana  2019  general       Edwards   

                               pollster           N      methodology  \
0              Blum & Weprin Associates  364.000000             Live   
1              Blum & Weprin Associates  364.000000             Live   
2              Blum & Weprin Associates  662.000000            

In [1224]:
polls_538_gcb = pd.read_csv('data/generic_ballot_polls.csv')
polls_538_gcb['poll_date'] = pd.to_datetime(polls_538_gcb['start_date'])
polls_538_gcb['election_date'] = pd.to_datetime('11/3/2020') # 2020
polls_538_gcb['year'] = polls_538_gcb['cycle']
polls_538_gcb.loc[polls_538_gcb['year'] == 2018, 'election_date'] = pd.to_datetime('11/6/2018')
polls_538_gcb['days_until'] = (polls_538_gcb['election_date'] - polls_538_gcb['poll_date']).dt.days
polls_538_gcb['election'] = 'GCB'
polls_538_gcb['state'] = 'National'
polls_538_gcb['N'] = polls_538_gcb['sample_size']
polls_538_gcb['stage'] = 'general'
polls_538_gcb['dem_candidate'] = 'GCB'
polls_538_gcb['dem_polled'] = polls_538_gcb['dem']
polls_538_gcb['rep_polled'] = polls_538_gcb['rep']
polls_538_gcb['ind_polled'] = 0
polls_538_gcb['dem_actual'] = 50.8 # 2020 
polls_538_gcb['rep_actual'] = 47.7 # 2020
polls_538_gcb['ind_actual'] = 0
polls_538_gcb.loc[polls_538_gcb['year'] == 2018, 'dem_actual'] = 53.4
polls_538_gcb.loc[polls_538_gcb['year'] == 2018, 'rep_actual'] = 44.8
polls_538_gcb['undecided_polled'] = 100 - polls_538_gcb['dem_polled'] - polls_538_gcb['rep_polled']
polls_538_gcb['538grade'] = polls_538_gcb['fte_grade'].fillna('Unknown')
polls_538_gcb = polls_538_gcb[['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N', 'methodology',
                               'population', 'days_until', '538grade', 'dem_polled', 'rep_polled', 'ind_polled',
                               'undecided_polled', 'dem_actual', 'rep_actual', 'ind_actual']]
show(polls_538_gcb)

     election     state  year    stage dem_candidate  \
0         GCB  National  2020  general           GCB   
1         GCB  National  2020  general           GCB   
2         GCB  National  2020  general           GCB   
3         GCB  National  2020  general           GCB   
4         GCB  National  2020  general           GCB   
...       ...       ...   ...      ...           ...   
2701      GCB  National  2018  general           GCB   
2702      GCB  National  2018  general           GCB   
2703      GCB  National  2018  general           GCB   
2704      GCB  National  2018  general           GCB   
2705      GCB  National  2018  general           GCB   

                                          pollster           N methodology  \
0                                           YouGov 1500.000000      Online   
1                                           YouGov 1363.000000      Online   
2     John Zogby Strategies/EMI Research Solutions 1008.000000      Online   
3              

In [1225]:
show(polls_538_all[polls_538_all['undecided_polled'] >= 20])

       election          state  year           stage dem_candidate  \
131      senate        Georgia  2020  jungle primary       Warnock   
132      senate        Georgia  2020  jungle primary       Warnock   
133      senate        Georgia  2020  jungle primary        Tarver   
134      senate        Georgia  2020  jungle primary        Tarver   
135      senate        Georgia  2020  jungle primary     Lieberman   
...         ...            ...   ...             ...           ...   
24957  governor  Massachusetts  2018         general      Gonzalez   
24961  governor       Illinois  2018         general      Pritzker   
24964  governor       Michigan  2018         general       Whitmer   
24965  governor  Massachusetts  2018         general      Gonzalez   
24970  governor       New York  2018         general         Cuomo   

                      pollster           N        methodology population  \
131    Landmark Communications  500.000000     IVR/Live Phone         lv   
132    

In [1226]:
polls = pd.concat((polls, polls_538_all, polls_538_gcb, raw_polls)).reset_index(drop=True)
polls['days_until'] = polls['days_until'].astype(int)
polls['N'] = polls['N'].fillna(-1).astype(int)
polls = polls[polls['N'] != -1]
polls = polls[polls['population'] == 'lv']
polls = polls[polls['undecided_polled'] < 20]
polls = polls.dropna(subset=['dem_actual'])
polls = polls[['election', 'state', 'year', 'stage', 'dem_candidate', 'pollster', 'N', 'methodology',
               'days_until', '538grade', 'dem_polled', 'rep_polled', 'ind_polled', 'undecided_polled',
               'dem_actual', 'rep_actual', 'ind_actual']]
polls['dem_diff'] = polls['dem_polled'] + polls['undecided_polled'] / 2 - polls['dem_actual']
polls['rep_diff'] = polls['rep_polled'] + polls['undecided_polled'] / 2 - polls['rep_actual']
polls['polled_margin'] = polls['dem_polled'] - polls['rep_polled']
polls['actual_margin'] = polls['dem_actual'] - polls['rep_actual']
polls['margin_diff'] = polls['polled_margin'] - polls['actual_margin']
polls['margin_abs_diff'] = np.abs(polls['polled_margin'] - polls['actual_margin'])
polls['methodology'] = polls['methodology'].fillna('Unknown')
polls['538grade'] = polls['538grade'].fillna('Unknown')
polls['538grade_simple'] = polls['538grade'].apply(clean_grade)
polls = polls.drop_duplicates()
show(polls)

        election        state  year    stage dem_candidate  \
0      president      Alabama  2004  general        NoName   
1      president      Alabama  2004  general        NoName   
2      president      Alabama  2004  general        NoName   
3      president      Alabama  2004  general        NoName   
4      president      Alabama  2004  general        NoName   
...          ...          ...   ...      ...           ...   
48182      House         NC-9  2019  general      Mccready   
48184   governor    Louisiana  2019  general       Edwards   
48186   governor  Mississippi  2019  general          Hood   
48187   governor    Louisiana  2019  general       Edwards   
48188   governor    Louisiana  2019  general       Edwards   

                                pollster     N  methodology  days_until  \
0                             SurveyUSA    735      Unknown           6   
1                        Capital Survey    645      Unknown          19   
2                             

In [1227]:
display_columns(polls)

## 538grade ##
Unknown    51.681881
B/C        10.220499
D-          6.506607
C+          4.201576
B           4.176566
B+          3.492977
A           3.322079
A+          2.650994
B-          2.513443
A/B         2.446751
C           2.250844
F           2.179984
C-          1.708974
A-          1.392189
C/D         1.092076
D+          0.162561
Name: 538grade, dtype: float64
-
-
## 538grade_simple ##
Unknown    51.681881
C          17.764995
B          12.629736
D/F        10.558126
A           7.365262
Name: 538grade_simple, dtype: float64
-
-
## N ##
Mean: 1053.1250468925848 Median: 699.0 SD: 2028.3532036076006
-
-
## actual_margin ##
Mean: 1.9309549005921287 Median: 2.3500000000000014 SD: 14.164612071185505
-
-
## days_until ##
Mean: 45.834312867325245 Median: 16.0 SD: 77.8533498086873
-
-
## dem_actual ##
Mean: 50.33889503417561 Median: 50.46248715310001 SD: 7.253225650594355
-
-
## dem_candidate ##
NoName       50.485599
Biden        11.929474
Obama         4.322454
GCB       

In [1228]:
polls.to_csv('polls.csv', index=False)