# 2020 presidential election results

### Import Python tools

In [1]:
# !pip install nb_black
%load_ext lab_black

In [2]:
import pandas as pd

### Read the data

In [3]:
df = pd.read_json("../data/elections/election_results_2020.json")

In [4]:
df.columns

Index(['state_name', 'county_fips', 'county_name', 'votes_gop', 'votes_dem',
       'total_votes'],
      dtype='object')

### Just the first five rows

In [5]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
0,Alabama,1001,Autauga County,19838,7503,27770
1,Alabama,1003,Baldwin County,83544,24578,109679
2,Alabama,1005,Barbour County,5622,4816,10518
3,Alabama,1007,Bibb County,7525,1986,9595
4,Alabama,1009,Blount County,24711,2640,27588


### Let's change data types

In [6]:
df.dtypes

state_name     object
county_fips     int64
county_name    object
votes_gop       int64
votes_dem       int64
total_votes     int64
dtype: object

In [7]:
df["county_fips"] = df["county_fips"].astype(str)

### Deal with four-digit FIPS codes

In [8]:
df["county_fips"] = df["county_fips"].str.zfill(5)

### Basics about the dataset

In [9]:
df.describe().round()

Unnamed: 0,votes_gop,votes_dem,total_votes
count,3152.0,3152.0,3152.0
mean,23543.0,25782.0,50264.0
std,54040.0,96930.0,149378.0
min,60.0,4.0,66.0
25%,3662.0,1320.0,5415.0
50%,8123.0,3690.0,12336.0
75%,20509.0,11944.0,33304.0
max,1145530.0,3028885.0,4263443.0


### Look at a slim version of the data

In [10]:
df[["county_name", "total_votes"]].head()

Unnamed: 0,county_name,total_votes
0,Autauga County,27770
1,Baldwin County,109679
2,Barbour County,10518
3,Bibb County,9595
4,Blount County,27588


### Let's change the county_name string

In [11]:
df["county_name"] = df["county_name"].str.replace(" County", "", regex=False)

### Did it work? Subset the data

In [12]:
df[df["state_name"] == "Louisiana"].head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
1123,Louisiana,22001,Acadia Parish,22596,5443,28425
1124,Louisiana,22003,Allen Parish,7574,2108,9810
1125,Louisiana,22005,Ascension Parish,40687,20399,62325
1126,Louisiana,22007,Assumption Parish,7271,3833,11235
1127,Louisiana,22009,Avoyelles Parish,12028,4979,17292


In [13]:
df["county_name"] = (
    df["county_name"]
    .str.replace(" County", "", regex=False)
    .str.replace(" Parish", "", regex=False)
)

### Subset some more. Just large Texas counties?

In [14]:
df[(df["state_name"] == "Texas") & (df["total_votes"] > 500000)].head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
2547,Texas,48029,Bexar,308618,448452,768952
2589,Texas,48113,Dallas,307076,598576,921638
2633,Texas,48201,Harris,700630,918193,1640818
2752,Texas,48439,Tarrant,409741,411567,834697
2759,Texas,48453,Travis,161337,435860,610349


---

### Calculate the margin for each party

In [15]:
df["margin_dem"] = df["votes_dem"] - df["votes_gop"]
df["margin_gop"] = df["votes_gop"] - df["votes_dem"]

In [16]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop
0,Alabama,1001,Autauga,19838,7503,27770,-12335,12335
1,Alabama,1003,Baldwin,83544,24578,109679,-58966,58966
2,Alabama,1005,Barbour,5622,4816,10518,-806,806
3,Alabama,1007,Bibb,7525,1986,9595,-5539,5539
4,Alabama,1009,Blount,24711,2640,27588,-22071,22071


### Calculate the vote share — or percentage — for each party

In [17]:
df["pct_dem"] = round((df["votes_dem"] / df["total_votes"]) * 100)
df["pct_gop"] = round((df["votes_gop"] / df["total_votes"]) * 100)

In [18]:
df["dem_diff"] = df["pct_dem"] - df["pct_gop"]
df["gop_diff"] = df["pct_gop"] - df["pct_dem"]

In [19]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff
0,Alabama,1001,Autauga,19838,7503,27770,-12335,12335,27.0,71.0,-44.0,44.0
1,Alabama,1003,Baldwin,83544,24578,109679,-58966,58966,22.0,76.0,-54.0,54.0
2,Alabama,1005,Barbour,5622,4816,10518,-806,806,46.0,53.0,-7.0,7.0
3,Alabama,1007,Bibb,7525,1986,9595,-5539,5539,21.0,78.0,-57.0,57.0
4,Alabama,1009,Blount,24711,2640,27588,-22071,22071,10.0,90.0,-80.0,80.0


### Which party won each county — the Python way?

In [20]:
def winner(row):
    if row["votes_dem"] > row["votes_gop"]:
        return "dem"
    elif row["votes_gop"] > row["votes_dem"]:
        return "gop"
    else:
        return "tie"

In [21]:
df["winner"] = df.apply(winner, axis=1)

### Or, the Pandas way? 

In [22]:
df["winner_nu"] = (
    df[["votes_gop", "votes_dem"]]
    .idxmax(axis=1)
    .str.replace("Mrs.", "Ms.", regex=False)
)

### Did it work? 

In [23]:
df.winner.value_counts()

gop    2595
dem     557
Name: winner, dtype: int64

In [24]:
df.winner_nu.value_counts("normalize")

votes_gop    0.823287
votes_dem    0.176713
Name: winner_nu, dtype: float64

### We don't need that 'winner_nu' column

In [25]:
df.drop(["winner_nu"], axis=1, inplace=True)

---

### Where was the Republicans' largest margin? 

In [26]:
df.sort_values("margin_gop", ascending=False).head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner
2702,Texas,48339,Montgomery,193382,74377,271451,-119005,119005,27.0,71.0,-44.0,44.0,gop
2811,Utah,49049,Utah,192812,76033,284480,-116779,116779,27.0,68.0,-41.0,41.0,gop
1798,New Jersey,34029,Ocean,217740,119456,342746,-98284,98284,35.0,64.0,-29.0,29.0,gop
365,Florida,12071,Lee,233247,157695,393899,-75552,75552,40.0,59.0,-19.0,19.0,gop
1174,Louisiana,22103,St. Tammany,99666,37746,140110,-61920,61920,27.0,71.0,-44.0,44.0,gop


### Which counties were most Democratic?

In [27]:
df.sort_values("pct_dem", ascending="True").head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner
2729,Texas,48393,Roberts,529,17,550,-512,512,3.0,96.0,-93.0,93.0,gop
2549,Texas,48033,Borden,397,16,416,-381,381,4.0,95.0,-91.0,91.0,gop
1624,Montana,30033,Garfield,764,41,813,-723,723,5.0,94.0,-89.0,89.0,gop
997,Kansas,20199,Wallace,762,44,817,-718,718,5.0,93.0,-88.0,88.0,gop
2667,Texas,48269,King,151,8,159,-143,143,5.0,95.0,-90.0,90.0,gop


### Which counties were most Republican?

In [28]:
df.sort_values("pct_dem", ascending="False").head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner
2729,Texas,48393,Roberts,529,17,550,-512,512,3.0,96.0,-93.0,93.0,gop
2549,Texas,48033,Borden,397,16,416,-381,381,4.0,95.0,-91.0,91.0,gop
1624,Montana,30033,Garfield,764,41,813,-723,723,5.0,94.0,-89.0,89.0,gop
997,Kansas,20199,Wallace,762,44,817,-718,718,5.0,93.0,-88.0,88.0,gop
2667,Texas,48269,King,151,8,159,-143,143,5.0,95.0,-90.0,90.0,gop


### Which counties were most Democratic? 

In [29]:
df[df["total_votes"] > 500000].sort_values("pct_dem", ascending=False).head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner
1868,New York,36061,New York,85185,603040,695263,517855,-517855,87.0,12.0,75.0,-75.0,dem
2304,Pennsylvania,42101,Philadelphia,132740,603790,741377,471050,-471050,81.0,18.0,63.0,-63.0,dem
197,California,6001,Alameda,136309,617659,769864,481350,-481350,80.0,18.0,62.0,-62.0,dem
1217,Maryland,24031,Montgomery,101222,419569,533743,318347,-318347,79.0,19.0,60.0,-60.0,dem
1861,New York,36047,Kings,202772,703310,913690,500538,-500538,77.0,22.0,55.0,-55.0,dem


---

### Difference between counties won and votes won

In [30]:
df.winner.value_counts()

gop    2595
dem     557
Name: winner, dtype: int64

### Aggregate by winner

In [31]:
df.votes_gop.sum()

74208670

In [32]:
df.votes_dem.sum()

81265196

In [33]:
df.groupby(["winner"]).agg({"total_votes": "mean"}).round(0).reset_index()

Unnamed: 0,winner,total_votes
0,dem,169433.0
1,gop,24685.0


---

### Aggregate by state

In [34]:
df.groupby(["state_name"]).agg(
    {"votes_gop": "sum", "votes_dem": "sum", "total_votes": "sum"}
).reset_index()

Unnamed: 0,state_name,votes_gop,votes_dem,total_votes
0,Alabama,1441168,849648,2323304
1,Alaska,189892,153405,391346
2,Arizona,1661686,1672143,3387326
3,Arkansas,760647,423932,1219069
4,California,6005961,11109764,17495906
5,Colorado,1364607,1804352,3256953
6,Connecticut,715291,1080680,1824280
7,Delaware,200603,296268,504010
8,District of Columbia,18586,317323,344356
9,Florida,5668731,5297045,11067456


### Assign the new dataframe to a variable called states

In [35]:
states = (
    df.groupby(["state_name"])
    .agg({"votes_gop": "sum", "votes_dem": "sum", "total_votes": "sum"})
    .reset_index()
)

### Calculate vote share by state

In [36]:
states["pct_dem"] = round((states["votes_dem"] / states["total_votes"]) * 100)
states["pct_gop"] = round((states["votes_gop"] / states["total_votes"]) * 100)

In [37]:
states.head()

Unnamed: 0,state_name,votes_gop,votes_dem,total_votes,pct_dem,pct_gop
0,Alabama,1441168,849648,2323304,37.0,62.0
1,Alaska,189892,153405,391346,39.0,49.0
2,Arizona,1661686,1672143,3387326,49.0,49.0
3,Arkansas,760647,423932,1219069,35.0,62.0
4,California,6005961,11109764,17495906,63.0,34.0


### And the winner is...

In [38]:
states["winner"] = (
    states[["votes_gop", "votes_dem"]]
    .idxmax(axis=1)
    .str.replace("votes_", "", regex=False)
)

In [39]:
states.head()

Unnamed: 0,state_name,votes_gop,votes_dem,total_votes,pct_dem,pct_gop,winner
0,Alabama,1441168,849648,2323304,37.0,62.0,gop
1,Alaska,189892,153405,391346,39.0,49.0,gop
2,Arizona,1661686,1672143,3387326,49.0,49.0,dem
3,Arkansas,760647,423932,1219069,35.0,62.0,gop
4,California,6005961,11109764,17495906,63.0,34.0,dem


In [40]:
states["winner"].value_counts()

dem    26
gop    25
Name: winner, dtype: int64

---

### Export the states

In [41]:
states.to_csv("../data/elections/states_election_results_2020.csv", index=False)

In [42]:
states.to_json(
    "../data/elections/states_election_results_2020.json", orient="records", indent=4
)

### Counties

In [43]:
df.to_csv("../data/elections/counties_election_results_2020.csv", index=False)

In [44]:
df.to_json(
    "../data/elections/counties_election_results_2020.json", orient="records", indent=4
)