# How did polling averages and results differ in 2020?

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import us
import tabula
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [5]:
def color_negative_red(val):
    """
    Takes a scalar and returns a string with
    the css property `'color: red'` for negative
    strings, black otherwise.
    """
    color = "red" if val < 0 else "black"
    return "color: %s" % color

---

## Read data

In [6]:
df_2016 = pd.read_csv("data/processed/competetive_races_2016.csv", dtype={"year": str})
df_2018 = pd.read_csv("data/processed/competetive_races_2018.csv", dtype={"year": str})
df_2020 = pd.read_csv("data/processed/competetive_races_2020.csv", dtype={"year": str})

In [7]:
df = pd.concat([df_2016, df_2018, df_2020])

In [8]:
df[df["year"] == "2016"]

Unnamed: 0,year,state,gop_polling,gop_vote_pct,dem_polling,dem_vote_pct,gop_diff,dem_diff,winner
0,2016,Arizona,44.4,48.67,42.7,45.13,-4.27,-2.43,GOP
1,2016,Colorado,39.7,43.25,43.7,48.16,-3.55,-4.46,DEM
2,2016,Florida,44.4,49.02,45.7,47.82,-4.62,-2.12,GOP
3,2016,Georgia,47.2,50.77,43.5,45.64,-3.57,-2.14,GOP
4,2016,Indiana,47.5,56.94,37.1,37.77,-9.44,-0.67,GOP
5,2016,Iowa,43.5,51.15,40.4,41.74,-7.65,-1.34,GOP
6,2016,Maine,37.1,44.87,45.1,47.83,-7.77,-2.73,DEM
7,2016,Michigan,40.4,47.5,44.5,47.27,-7.1,-2.77,GOP
8,2016,Minnesota,37.6,44.92,44.9,46.44,-7.32,-1.54,DEM
9,2016,Missouri,49.1,56.77,38.8,38.14,-7.67,0.66,GOP


---

#### Try some groupings

In [9]:
df_grouped = (
    df.groupby(["year"])
    .agg(
        {
            "gop_polling": "mean",
            "gop_vote_pct": "mean",
            "gop_diff": "mean",
            "dem_polling": "mean",
            "dem_vote_pct": "mean",
            "dem_diff": "mean",
        }
    )
    .round(2)
    .reset_index()
)

In [10]:
df_grouped["gop_dem_diff"] = df_grouped["gop_diff"] - df_grouped["dem_diff"]

In [11]:
df_grouped.rename(
    columns={
        "gop_diff": "gop_diff_polls_vs_results",
        "dem_diff": "dem_diff_polls_vs_results",
    },
    inplace=True,
)

In [12]:
df_grouped.head()

Unnamed: 0,year,gop_polling,gop_vote_pct,gop_diff_polls_vs_results,dem_polling,dem_vote_pct,dem_diff_polls_vs_results,gop_dem_diff
0,2016,42.86,49.13,-6.27,41.89,43.66,-1.77,-4.5
1,2018,46.43,49.25,-2.82,46.38,48.29,-1.9,-0.92
2,2020,47.54,51.46,-3.92,47.62,46.55,1.06,-4.98


In [13]:
years = df_grouped.melt(
    id_vars="year",
    value_vars=["dem_polling", "gop_polling"],
    value_name="share",
    var_name="party",
)

In [14]:
years

Unnamed: 0,year,party,share
0,2016,dem_polling,41.89
1,2018,dem_polling,46.38
2,2020,dem_polling,47.62
3,2016,gop_polling,42.86
4,2018,gop_polling,46.43
5,2020,gop_polling,47.54


In [15]:
alt.Chart(years).mark_bar().encode(
    x=alt.X("share", stack="normalize", title="Vote share", axis=alt.Axis(format="%")),
    y=alt.Y("year:O", title=""),
    color="party",
).properties(height=100, width=400)

In [16]:
# bar = (
#     alt.Chart(years)
#     .mark_bar()
#     .encode(x="share", y="party")
#     .properties(width=alt.Step(40))  # controls width of bar.
# )

# tick = (
#     alt.Chart(df_grouped)
#     .mark_tick(
#         color="black",
#         thickness=2,
#         size=20 * 0.5,  # controls width of tick.
#     )
#     .encode(x="dem_vote_pct", y="year")
# )

# (bar + tick).properties(width=200, height=100)

---

#### Melt the table for competetive races for charting

In [17]:
df_long = pd.melt(
    competetive_df,
    id_vars=["state"],
    value_vars=["dem_diff", "gop_diff"],
    var_name="party",
    value_name="diff_value",
)

NameError: name 'competetive_df' is not defined

In [None]:
df_long["party"] = (
    df_long["party"]
    .str.replace("dem_diff", "Democrats")
    .str.replace("gop_diff", "Republicans")
)

In [None]:
alt.Chart(df_long).mark_bar().encode(
    x=alt.X("diff_value", title=" ", axis=alt.Axis()),
    y=alt.Y("state", title=" "),
    color=alt.condition(
        alt.datum.diff_value > 0,
        alt.value("#00d4d8"),  # The positive color
        alt.value("#d95f1a"),  # The negative color
    ),
    facet=alt.Facet(
        "party",
        columns=2,
        title=" ",
        header=alt.Header(labelFontSize=15, labelFont="Summit Sans"),
    ),
).properties(
    height=400,
    width=300,
    title="2020 presidential in competetive states: PPT difference between polling and result",
)

---

#### Export

In [None]:
competetive_df.to_csv("data/processed/competetive_races_2020.csv", index=False)