# How to polling averages and results differ?

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import us
import tabula
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [5]:
year = "2016"

---

## Read data

#### Make a list of the most 'competetive' states from 2016, [according to Inside Elections](https://www.insideelections.com/ratings/president/2016-presidential-ratings-november-7-2016)

In [6]:
competetive_src = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vStoirfqFdugAT8mfQFlmbVzgm8IKA2GS1_nfsysTMp2oXx7SpR6Sz5MiNoaRYPPcB5Fz7ZiN3Hx35U/pub?gid=0&single=true&output=csv",
    dtype={"year": str},
)

In [7]:
competetive = list(competetive_src[competetive_src["year"] == "2016"]["state"])

In [8]:
competetive

['Ohio',
 'Florida',
 'North Carolina',
 'Nevada',
 'Wisconsin',
 'Michigan',
 'New Hampshire',
 'Pennsylvania',
 'Colorado',
 'Maine',
 'Minnesota',
 'Virginia',
 'Arizona',
 'Iowa',
 'Georgia',
 'Indiana',
 'Utah',
 'Missouri',
 'Nebraska']

#### Polls

In [9]:
polls = pd.read_csv("data/processed/2016_polling_average_states_538.csv")

In [10]:
polls.head()

Unnamed: 0,state,description,gop_polling,dem_polling,other_polling,gop_polling_margin,dem_polling_margin,year
0,Alabama,538 polling average,55.4,32.6,5.2,22.8,-22.8,2016
1,Alaska,538 polling average,42.1,36.4,9.0,5.7,-5.7,2016
2,Arizona,538 polling average,44.4,42.7,5.6,1.7,-1.7,2016
3,Arkansas,538 polling average,52.6,32.4,5.6,20.2,-20.2,2016
4,California,538 polling average,31.1,54.8,4.6,-23.7,23.7,2016


#### Results

In [11]:
results = pd.read_csv("data/processed/2016_election_results_states_fec.csv")

In [12]:
results.head()

Unnamed: 0,state,gop_vote_pct,dem_vote_pct,other_vote_pct,year
0,Alabama,62.08,34.36,3.56,2016
1,Alaska,51.28,36.55,12.17,2016
2,Arizona,48.67,45.13,6.2,2016
3,Arkansas,60.57,33.65,5.78,2016
4,California,31.62,61.73,6.66,2016


#### Merge 'em

In [13]:
df = pd.merge(polls, results, on=["state", "year"])

In [14]:
df = df[["year", "state", "gop_polling", "gop_vote_pct", "dem_polling", "dem_vote_pct"]]

In [15]:
df["gop_diff"] = df["gop_polling"] - df["gop_vote_pct"]
df["dem_diff"] = df["dem_polling"] - df["dem_vote_pct"]

In [16]:
df["winner"] = (
    df[["gop_vote_pct", "dem_vote_pct"]]
    .idxmax(axis=1)
    .str.replace("_vote_pct", "")
    .str.upper()
)

#### How off were the polls for each party?

In [17]:
df.dem_diff.mean().round(2)

-2.51

In [18]:
df.gop_diff.mean().round(2)

-6.71

#### Just the competetive states

In [19]:
competetive_df = df[df["state"].isin(competetive)]

In [20]:
competetive_df.head()

Unnamed: 0,year,state,gop_polling,gop_vote_pct,dem_polling,dem_vote_pct,gop_diff,dem_diff,winner
2,2016,Arizona,44.4,48.67,42.7,45.13,-4.27,-2.43,GOP
5,2016,Colorado,39.7,43.25,43.7,48.16,-3.55,-4.46,DEM
8,2016,Florida,44.4,49.02,45.7,47.82,-4.62,-2.12,GOP
9,2016,Georgia,47.2,50.77,43.5,45.64,-3.57,-2.14,GOP
13,2016,Indiana,47.5,56.94,37.1,37.77,-9.44,-0.67,GOP


#### How off were the polls for each party in competetive states?

In [21]:
competetive_df.gop_diff.mean().round(2)

-6.27

In [22]:
competetive_df.dem_diff.mean().round(2)

-1.77

---

#### Melt the table for competetive races for charting

In [23]:
df_long = pd.melt(
    competetive_df,
    id_vars=["state"],
    value_vars=["dem_diff", "gop_diff"],
    var_name="party",
    value_name="diff_value",
)

In [24]:
df_long["party"] = (
    df_long["party"]
    .str.replace("dem_diff", "Democrats")
    .str.replace("gop_diff", "Republicans")
)

In [25]:
alt.Chart(df_long).mark_bar().encode(
    x=alt.X("diff_value", title=" ", axis=alt.Axis()),
    y=alt.Y("state", title=" "),
    color=alt.condition(
        alt.datum.diff_value > 0,
        alt.value("#00d4d8"),  # The positive color
        alt.value("#d95f1a"),  # The negative color
    ),
    facet=alt.Facet(
        "party",
        columns=2,
        title=" ",
        header=alt.Header(labelFontSize=15, labelFont="Summit Sans"),
    ),
).properties(
    height=300,
    width=300,
    title="2016 presidential in competetive states: PPT difference between polling and result",
)

#### Export

In [26]:
competetive_df.to_csv("data/processed/competetive_races_2016.csv", index=False)