# How did polling averages and results differ in 2020?

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import us
import tabula
import altair as alt
import altair_grid as altgrid

In [3]:
alt.themes.register("grid", altgrid.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Read data

#### Make a list of the most 'competetive' states from 2020, [according to Inside Elections](http://www.insideelections.com/ratings/president/2020-presidential-ratings-october-28-2020)

In [5]:
competetive_src = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vStoirfqFdugAT8mfQFlmbVzgm8IKA2GS1_nfsysTMp2oXx7SpR6Sz5MiNoaRYPPcB5Fz7ZiN3Hx35U/pub?gid=0&single=true&output=csv",
    dtype={"year": str},
)

In [6]:
competetive = list(competetive_src[competetive_src["year"] == "2020"]["state"])

In [7]:
competetive

['Iowa',
 'Ohio',
 'Texas',
 'Arizona',
 'Florida',
 'Georgia',
 'North Carolina',
 'Michigan',
 'Pennsylvania',
 'Wisconsin',
 'Minnesota',
 'New Hampshire',
 'Nevada',
 'Alaska',
 'Kansas',
 'Missouri',
 'Montana',
 'South Carolina',
 'Utah']

#### Polls

In [8]:
polls = pd.read_csv("data/processed/2020_polling_average_states_538.csv")

In [9]:
polls.head()

Unnamed: 0,state,gop_polling,dem_polling,gop_polling_margin,dem_polling_margin,year,description
0,Alabama,57.36,37.83,19.53,-19.53,2020,538 polling average
1,Alaska,51.23,43.57,7.66,-7.66,2020,538 polling average
2,Arizona,46.1,48.71,-2.6,2.6,2020,538 polling average
3,Arkansas,58.95,36.18,22.77,-22.77,2020,538 polling average
4,California,32.44,61.62,-29.19,29.19,2020,538 polling average


#### Results

In [10]:
results = pd.read_csv("data/processed/2020_election_results_states_fec.csv")

In [11]:
results.head()

Unnamed: 0,state,gop_vote_pct,dem_vote_pct,other_vote_pct,year
0,Alabama,62.03,36.57,1.4,2020
1,Alaska,52.83,42.77,4.39,2020
2,Arizona,49.06,49.36,1.58,2020
3,Arkansas,62.4,34.78,2.83,2020
4,California,34.32,63.48,2.2,2020


In [12]:
df = pd.merge(polls, results, on=["state", "year"])

In [13]:
df = df[["year", "state", "gop_polling", "gop_vote_pct", "dem_polling", "dem_vote_pct"]]

In [14]:
df["gop_diff"] = df["gop_polling"] - df["gop_vote_pct"]
df["dem_diff"] = df["dem_polling"] - df["dem_vote_pct"]

In [15]:
df["winner"] = (
    df[["gop_vote_pct", "dem_vote_pct"]]
    .idxmax(axis=1)
    .str.replace("_vote_pct", "")
    .str.upper()
)

#### How off were the polls for each party?

In [16]:
df.gop_diff.mean().round(2)

-4.11

In [17]:
df.dem_diff.mean().round(2)

1.17

---

#### Just the competetive states

In [18]:
competetive_df = df[df["state"].isin(competetive)]

In [19]:
competetive_df.head()

Unnamed: 0,year,state,gop_polling,gop_vote_pct,dem_polling,dem_vote_pct,gop_diff,dem_diff,winner
1,2020,Alaska,51.23,52.83,43.57,42.77,-1.6,0.8,GOP
2,2020,Arizona,46.1,49.06,48.71,49.36,-2.96,-0.65,DEM
8,2020,Florida,46.62,51.22,49.08,47.86,-4.6,1.22,GOP
9,2020,Georgia,47.37,49.24,48.54,49.47,-1.87,-0.93,DEM
14,2020,Iowa,47.6,53.09,46.31,44.89,-5.49,1.42,GOP


#### How off were the polls for each party in competetive states?

In [20]:
competetive_df.gop_diff.mean().round(2)

-3.92

In [21]:
competetive_df.dem_diff.mean().round(2)

1.06

---

#### Melt the table for competetive races for charting

In [22]:
df_long = pd.melt(
    competetive_df,
    id_vars=["state"],
    value_vars=["dem_diff", "gop_diff"],
    var_name="party",
    value_name="diff_value",
)

In [23]:
df_long["party"] = (
    df_long["party"]
    .str.replace("dem_diff", "Democrats")
    .str.replace("gop_diff", "Republicans")
)

In [24]:
alt.Chart(df_long).mark_bar().encode(
    x=alt.X("diff_value", title=" ", axis=alt.Axis()),
    y=alt.Y("state", title=" "),
    color=alt.condition(
        alt.datum.diff_value > 0,
        alt.value("#00d4d8"),  # The positive color
        alt.value("#d95f1a"),  # The negative color
    ),
    facet=alt.Facet(
        "party",
        columns=2,
        title=" ",
        header=alt.Header(labelFontSize=15, labelFont="Summit Sans"),
    ),
).properties(
    height=400,
    width=300,
    title="2020 presidential in competetive states: PPT difference between polling and result",
)

---

#### Export

In [25]:
competetive_df.to_csv("data/processed/competetive_races_2020.csv", index=False)