# How did polling averages and results differ in 2018?

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import us
import tabula
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Read data

#### Make a list of the most competetive states from 2018, [according to Inside Elections](https://www.insideelections.com/ratings/senate/2018-senate-ratings-november-1-2018)

In [5]:
competetive_src = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vStoirfqFdugAT8mfQFlmbVzgm8IKA2GS1_nfsysTMp2oXx7SpR6Sz5MiNoaRYPPcB5Fz7ZiN3Hx35U/pub?gid=0&single=true&output=csv",
    dtype={"year": str},
)

In [6]:
competetive = list(competetive_src[competetive_src["year"] == "2018"]["state"])

In [7]:
competetive

['Indiana',
 'Arizona',
 'Florida',
 'Montana',
 'Nevada',
 'West Virginia',
 'Minnesota',
 'New Jersey',
 'Missouri',
 'North Dakota',
 'Tennessee',
 'Texas']

#### Polls

In [8]:
polls = pd.read_csv("data/processed/2018_polling_average_states_RCP.csv")

In [9]:
polls.head()

Unnamed: 0,state,dem_polling,gop_polling,year,dem_polling_margin,gop_polling_margin,description
0,Arizona,46.5,47.5,2018,-1.0,1.0,RCP polling average
1,Florida,48.8,46.4,2018,2.4,-2.4,RCP polling average
2,Indiana,44.0,43.3,2018,0.7,-0.7,RCP polling average
3,Massachusetts,55.8,31.0,2018,24.8,-24.8,RCP polling average
4,Michigan,52.0,43.7,2018,8.3,-8.3,RCP polling average


#### Results

In [10]:
results = pd.read_csv("data/processed/2018_election_results_states_fec.csv")

In [11]:
results.head()

Unnamed: 0,state,gop_vote_pct,dem_vote_pct,other_vote_pct,winner,year,gop_vote_margin,dem_vote_margin
0,Arizona,47.61,49.96,2.43,dem,2018,-2.35,2.35
1,California,0.0,100.0,0.0,dem,2018,-100.0,100.0
2,Connecticut,39.35,56.8,3.85,dem,2018,-17.45,17.45
3,Delaware,37.82,59.95,2.23,dem,2018,-22.13,22.13
4,Florida,50.05,49.93,0.01,gop,2018,0.12,-0.12


#### Merge 'em

In [12]:
df = pd.merge(polls, results, on=["state", "year"])

In [13]:
df = df[["year", "state", "gop_polling", "gop_vote_pct", "dem_polling", "dem_vote_pct"]]

In [14]:
df["gop_diff"] = df["gop_polling"] - df["gop_vote_pct"]
df["dem_diff"] = df["dem_polling"] - df["dem_vote_pct"]

In [15]:
df["winner"] = (
    df[["gop_vote_pct", "dem_vote_pct"]]
    .idxmax(axis=1)
    .str.replace("_vote_pct", "")
    .str.upper()
)

#### How off were the polls for each party?

In [16]:
df.gop_diff.mean().round(2)

-2.7

In [17]:
df.dem_diff.mean().round(2)

-2.27

---

#### Just the competetive states

In [18]:
competetive_df = df[df["state"].isin(competetive)]

In [19]:
competetive_df.head()

Unnamed: 0,year,state,gop_polling,gop_vote_pct,dem_polling,dem_vote_pct,gop_diff,dem_diff,winner
0,2018,Arizona,47.5,47.61,46.5,49.96,-0.11,-3.46,DEM
1,2018,Florida,46.4,50.05,48.8,49.93,-3.65,-1.13,GOP
2,2018,Indiana,43.3,50.73,44.0,44.84,-7.43,-0.84,GOP
5,2018,Missouri,46.8,51.38,46.2,45.57,-4.58,0.63,GOP
6,2018,Montana,45.7,46.78,49.0,50.33,-1.08,-1.33,DEM


#### How off were the polls for each party in competetive states?

In [20]:
competetive_df.gop_diff.mean().round(2)

-2.82

In [21]:
competetive_df.dem_diff.mean().round(2)

-1.9

---

#### Melt the table for competetive races for charting

In [22]:
df_long = pd.melt(
    competetive_df,
    id_vars=["state"],
    value_vars=["dem_diff", "gop_diff"],
    var_name="party",
    value_name="diff_value",
)

In [23]:
df_long["party"] = (
    df_long["party"]
    .str.replace("dem_diff", "Democrats")
    .str.replace("gop_diff", "Republicans")
)

In [24]:
alt.Chart(df_long).mark_bar().encode(
    x=alt.X("diff_value", title=" ", axis=alt.Axis()),
    y=alt.Y("state", title=" "),
    color=alt.condition(
        alt.datum.diff_value > 0,
        alt.value("#00d4d8"),  # The positive color
        alt.value("#d95f1a"),  # The negative color
    ),
    facet=alt.Facet(
        "party",
        columns=2,
        title=" ",
        header=alt.Header(labelFontSize=15, labelFont="Summit Sans"),
    ),
).properties(
    height=300,
    width=300,
    title="2018 U.S. Senate competetive elections: PPT difference between polling and result",
)

---

#### Export

In [25]:
competetive_df.to_csv("data/processed/competetive_races_2018.csv", index=False)