# 2020 presidential election results

### Import Python tools

In [1]:
# !pip install nb_black

In [2]:
%load_ext lab_black

In [3]:
import pandas as pd

### Read the data

In [4]:
df = pd.read_json("../../data/elections/election_results_2020.json")

### Just the first five rows

In [5]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
0,Alabama,1001,Autauga County,19838,7503,27770
1,Alabama,1003,Baldwin County,83544,24578,109679
2,Alabama,1005,Barbour County,5622,4816,10518
3,Alabama,1007,Bibb County,7525,1986,9595
4,Alabama,1009,Blount County,24711,2640,27588


### Let's change data types

In [6]:
df.dtypes

state_name     object
county_fips     int64
county_name    object
votes_gop       int64
votes_dem       int64
total_votes     int64
dtype: object

In [7]:
df["county_fips"] = df["county_fips"].astype(str)

### Deal with four-digit FIPS codes

In [8]:
df["county_fips"] = df["county_fips"].str.zfill(5)

In [9]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
0,Alabama,1001,Autauga County,19838,7503,27770
1,Alabama,1003,Baldwin County,83544,24578,109679
2,Alabama,1005,Barbour County,5622,4816,10518
3,Alabama,1007,Bibb County,7525,1986,9595
4,Alabama,1009,Blount County,24711,2640,27588


### Basics about the dataset

In [10]:
df.describe().round(0)

Unnamed: 0,votes_gop,votes_dem,total_votes
count,3152.0,3152.0,3152.0
mean,23543.0,25782.0,50264.0
std,54040.0,96930.0,149378.0
min,60.0,4.0,66.0
25%,3662.0,1320.0,5415.0
50%,8123.0,3690.0,12336.0
75%,20509.0,11944.0,33304.0
max,1145530.0,3028885.0,4263443.0


### Look at a slim version of the data

In [11]:
df[["county_fips", "total_votes"]].tail()

Unnamed: 0,county_fips,total_votes
3147,56037,16603
3148,56039,14677
3149,56041,9402
3150,56043,4012
3151,56045,3542


In [12]:
### Let's change the county_name string

In [13]:
df["county_name"] = (
    df["county_name"].str.replace(" County", "").str.replace(" Parish", "")
)

In [14]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
0,Alabama,1001,Autauga,19838,7503,27770
1,Alabama,1003,Baldwin,83544,24578,109679
2,Alabama,1005,Barbour,5622,4816,10518
3,Alabama,1007,Bibb,7525,1986,9595
4,Alabama,1009,Blount,24711,2640,27588


### Did it work? Subset the data

In [15]:
df[df["state_name"] == "Louisiana"]

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
1123,Louisiana,22001,Acadia,22596,5443,28425
1124,Louisiana,22003,Allen,7574,2108,9810
1125,Louisiana,22005,Ascension,40687,20399,62325
1126,Louisiana,22007,Assumption,7271,3833,11235
1127,Louisiana,22009,Avoyelles,12028,4979,17292
...,...,...,...,...,...,...
1182,Louisiana,22119,Webster,11830,6172,18216
1183,Louisiana,22121,West Baton Rouge,7684,6200,14097
1184,Louisiana,22123,West Carroll,4317,710,5052
1185,Louisiana,22125,West Feliciana,3863,2298,6268


In [22]:
df[df["votes_gop"] > 500000]

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
114,Arizona,4013,Maricopa,995665,1040774,2069475
215,California,6037,Los Angeles,1145530,3028885,4263443
226,California,6059,Orange,676498,814009,1521725
233,California,6073,San Diego,600094,964650,1601722
373,Florida,12086,Miami-Dade,532833,617864,1156816
620,Illinois,17031,Cook,558269,1725973,2321399
2633,Texas,48201,Harris,700630,918193,1640818


### Subset some more. Just large Texas counties?

In [29]:
df_texas_big = df[(df["state_name"] == "Texas") & (df["total_votes"] > 250000)]

In [30]:
df_texas_big

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes
2547,Texas,48029,Bexar,308618,448452,768952
2575,Texas,48085,Collin,252318,230945,492216
2589,Texas,48113,Dallas,307076,598576,921638
2593,Texas,48121,Denton,222480,188695,417964
2603,Texas,48141,El Paso,84331,178126,267215
2611,Texas,48157,Fort Bend,157718,195552,357514
2633,Texas,48201,Harris,700630,918193,1640818
2702,Texas,48339,Montgomery,193382,74377,271451
2752,Texas,48439,Tarrant,409741,411567,834697
2759,Texas,48453,Travis,161337,435860,610349


---

### Calculate the margin for each party

In [40]:
df["pct_dem"] = round((df["votes_dem"] / df["total_votes"]) * 100, 0)
df["pct_gop"] = round((df["votes_gop"] / df["total_votes"]) * 100, 0)

In [42]:
df["dem_diff"] = df["pct_dem"] - df["pct_gop"]
df["gop_diff"] = df["pct_gop"] - df["pct_dem"]

In [43]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,pct_dem,pct_gop,dem_diff,gop_diff
0,Alabama,1001,Autauga,19838,7503,27770,27.0,71.0,-44.0,44.0
1,Alabama,1003,Baldwin,83544,24578,109679,22.0,76.0,-54.0,54.0
2,Alabama,1005,Barbour,5622,4816,10518,46.0,53.0,-7.0,7.0
3,Alabama,1007,Bibb,7525,1986,9595,21.0,78.0,-57.0,57.0
4,Alabama,1009,Blount,24711,2640,27588,10.0,90.0,-80.0,80.0


### Which party won each county — the Python way?

In [48]:
def winner(r):
    if r["votes_dem"] > r["votes_gop"]:
        return "dem"
    elif r["votes_gop"] > r["votes_dem"]:
        return "gop"
    else:
        return "tie"

In [49]:
df["winner"] = df.apply(winner, axis=1)

In [56]:
df["winner"].value_counts()

gop    2595
dem     557
Name: winner, dtype: int64

In [55]:
df["winner"].value_counts("normalize").round(2)

gop    0.82
dem    0.18
Name: winner, dtype: float64

### Or, the Pandas way? 

In [69]:
df["winner_nu"] = (
    df[["votes_dem", "votes_gop"]].idxmax(axis=1).str.replace("votes_", "")
)

### Did it work? 

In [70]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,pct_dem,pct_gop,dem_diff,gop_diff,winner,winner_nu
0,Alabama,1001,Autauga,19838,7503,27770,27.0,71.0,-44.0,44.0,gop,gop
1,Alabama,1003,Baldwin,83544,24578,109679,22.0,76.0,-54.0,54.0,gop,gop
2,Alabama,1005,Barbour,5622,4816,10518,46.0,53.0,-7.0,7.0,gop,gop
3,Alabama,1007,Bibb,7525,1986,9595,21.0,78.0,-57.0,57.0,gop,gop
4,Alabama,1009,Blount,24711,2640,27588,10.0,90.0,-80.0,80.0,gop,gop


### We don't need that 'winner_nu' column

In [73]:
df.drop(["winner_nu"], axis=1, inplace=True)

In [74]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,pct_dem,pct_gop,dem_diff,gop_diff,winner
0,Alabama,1001,Autauga,19838,7503,27770,27.0,71.0,-44.0,44.0,gop
1,Alabama,1003,Baldwin,83544,24578,109679,22.0,76.0,-54.0,54.0,gop
2,Alabama,1005,Barbour,5622,4816,10518,46.0,53.0,-7.0,7.0,gop
3,Alabama,1007,Bibb,7525,1986,9595,21.0,78.0,-57.0,57.0,gop
4,Alabama,1009,Blount,24711,2640,27588,10.0,90.0,-80.0,80.0,gop


---

### Where was the Republicans' largest margin? 

### Which counties were most Democratic?

### Which counties were most Republican?

### Which counties were most Democratic? 

---

### Difference between counties won and votes won

### Aggregate by winner

---

### Aggregate by state

### Assign the new dataframe to a variable called states

### Calculate vote share by state

### And the winner is...

---

### Export the states

In [16]:
states.to_csv("../data/elections/states_election_results_2020.csv", index=False)

NameError: name 'states' is not defined

In [None]:
states.to_json(
    "../data/elections/states_election_results_2020.json", orient="records", indent=4
)

### Counties

In [None]:
df.to_csv("../data/elections/counties_election_results_2020.csv", index=False)

In [None]:
df.to_json(
    "../data/elections/counties_election_results_2020.json", orient="records", indent=4
)

---

---

### Export the states

In [None]:
states.to_csv("../data/elections/states_election_results_2020.csv", index=False)

In [None]:
states.to_json(
    "../data/elections/states_election_results_2020.json", orient="records", indent=4
)

### Counties

In [None]:
df.to_csv("../data/elections/counties_election_results_2020.csv", index=False)

In [None]:
df.to_json(
    "../data/elections/counties_election_results_2020.json", orient="records", indent=4
)

---