In [17]:
## Install gender package and linked database
install.packages("gender")
install.packages("genderdata", repos = "https://dev.ropensci.org", type = "source")

## Packages
library(dplyr)
library(gender)
library(stringr)
library(broom)
library(tidyr)
library(ggplot2)


Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



In [16]:
## Read data in, change path as necessary
elections_2016 <- read.csv("2016_election_results.csv")

head(elections_2016)

Unnamed: 0_level_0,X,Democrat,Republican,Other,State,Year,District,Democrat.Incumbent,Democrat.Votes,Republican.Incumbent,Republican.Votes,Other.Incumbent,Other.Votes
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>
1,0,Scott J. Kawasaki,No candidate,No candidate,Alaska,2016,1,True,1,False,0,0,0
2,1,Truno Holdaway,Steve M. Thompson,No candidate,Alaska,2016,2,False,1153,True,3268,0,0
3,2,Christina M. Sinclair,Tammie Wilson,Jeanne Olson,Alaska,2016,3,False,537,True,4291,False,2270
4,3,David Guttenberg,No candidate,No candidate,Alaska,2016,4,True,1,False,0,0,0
5,4,Adam Wool,Aaron Lojewski,No candidate,Alaska,2016,5,True,3812,False,3384,0,0
6,5,Jason T. Land,David M. Talerico,No candidate,Alaska,2016,6,False,2327,True,5126,0,0


In [3]:
## Function to fill predicted gender
gender_fill <- function(x) { 
  n <- length(x)
  x <- word(x) ## removes last names, gender() only works on first name strings
  gender_rep <- rep("a", n)
  for (i in 1:n) {
      if (x[i] == "No") {
        gender_rep[i] <- "N/A" ## handles no candidate
      } else if (length(pull(gender(x[i]), var = gender)) == 0) {
        gender_rep[i] <- "Unknown" ## handles names the function can't predict
      }  else {
      gender_rep[i] <- pull(gender(as.character(x[i])), var = gender)
      }
    }
    gender_rep
  }


## First if checks for "No" because "No candidate" has been truncated to "No", needs to be changed if databases have different indicators for no candidate
## Second if checks for length == 0 because gender() generates an empty table if it is unable to predict a gender
## Else applies gender() to the rest of the cases and pulls the gender result out of the results (gender function creates a table with ancillary info), coercion to character 
## because gender() only works on character type


In [4]:
## Testing function

test_string <- c("Michael B", "Jenny A", "No candidate", "xyzo")

print(gender_fill(test_string) == c("male", "female", "N/A", "Unknown"))


[1] TRUE TRUE TRUE TRUE


In [5]:
## Create gender columns, extremely extremely slow (like 20-30min at least), but works

elections_2016$gender_dem <- gender_fill(elections_2016$Democrat)

elections_2016$gender_rep <- gender_fill(elections_2016$Republican)

head(elections_2016)

Unnamed: 0_level_0,X,Democrat,Republican,Other,State,Year,District,Democrat.Incumbent,Democrat.Votes,Republican.Incumbent,Republican.Votes,Other.Incumbent,Other.Votes,gender_dem,gender_rep
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<chr>
1,0,Scott J. Kawasaki,No candidate,No candidate,Alaska,2016,1,True,1,False,0,0,0,male,
2,1,Truno Holdaway,Steve M. Thompson,No candidate,Alaska,2016,2,False,1153,True,3268,0,0,Unknown,male
3,2,Christina M. Sinclair,Tammie Wilson,Jeanne Olson,Alaska,2016,3,False,537,True,4291,False,2270,female,female
4,3,David Guttenberg,No candidate,No candidate,Alaska,2016,4,True,1,False,0,0,0,male,
5,4,Adam Wool,Aaron Lojewski,No candidate,Alaska,2016,5,True,3812,False,3384,0,0,male,male
6,5,Jason T. Land,David M. Talerico,No candidate,Alaska,2016,6,False,2327,True,5126,0,0,male,male


In [6]:
## Generate winner and gender of winner
elections_2016 <- elections_2016 %>%
    mutate(winner_party = case_when(Democrat.Votes > Republican.Votes & Democrat.Votes > Other.Votes ~ "Democrat",
                              Republican.Votes > Democrat.Votes & Republican.Votes > Other.Votes ~ "Republican",
                              TRUE ~ "Other"),
          winner_gender = case_when(winner_party == "Democrat" ~ gender_dem,
                                   winner_party == "Republican" ~ gender_rep,
                                   TRUE ~ "Other candidate")) %>%
    select(-X)

## Generate vote proportion
elections_2016 <- elections_2016 %>%
    mutate(vote_prop_dem = Democrat.Votes / (Democrat.Votes + Republican.Votes + Other.Votes),
          vote_prop_rep = Republican.Votes / (Democrat.Votes + Republican.Votes + Other.Votes),
          vote_prop_other = Other.Votes / (Democrat.Votes + Republican.Votes + Other.Votes))


# Rename variables for consistency
names(elections_2016) <- tolower(names(elections_2016))
names(elections_2016) <-gsub("\\.", "_", names(elections_2016))

head(elections_2016)


Unnamed: 0_level_0,democrat,republican,other,state,year,district,democrat_incumbent,democrat_votes,republican_incumbent,republican_votes,other_incumbent,other_votes,gender_dem,gender_rep,winner_party,winner_gender,vote_prop_dem,vote_prop_rep,vote_prop_other
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<int>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
1,Scott J. Kawasaki,No candidate,No candidate,Alaska,2016,1,True,1,False,0,0,0,male,,Democrat,male,1.0,0.0,0.0
2,Truno Holdaway,Steve M. Thompson,No candidate,Alaska,2016,2,False,1153,True,3268,0,0,Unknown,male,Republican,male,0.26080072,0.7391993,0.0
3,Christina M. Sinclair,Tammie Wilson,Jeanne Olson,Alaska,2016,3,False,537,True,4291,False,2270,female,female,Republican,female,0.07565511,0.6045365,0.3198084
4,David Guttenberg,No candidate,No candidate,Alaska,2016,4,True,1,False,0,0,0,male,,Democrat,male,1.0,0.0,0.0
5,Adam Wool,Aaron Lojewski,No candidate,Alaska,2016,5,True,3812,False,3384,0,0,male,male,Democrat,male,0.52973874,0.4702613,0.0
6,Jason T. Land,David M. Talerico,No candidate,Alaska,2016,6,False,2327,True,5126,0,0,male,male,Republican,male,0.31222327,0.6877767,0.0


In [7]:
## Created file with gender column so you don't have to run the function every time, commented out to prevent accidental overwrite
# write.csv(elections_2016, "2016_election_results_wide.csv")

In [8]:
## Create dataset of one line per candidate (not the ideal way to do it but works)

Dem_data <- elections_2016 %>%
    select(democrat, state, district, year, democrat_incumbent, democrat_votes, vote_prop_dem, gender_dem, winner_party)

Rep_data <- elections_2016 %>%
    select(republican, state, district, year, republican_incumbent, republican_votes, vote_prop_rep, gender_rep, winner_party)



Dem_data <- Dem_data %>%
    rename(candidate = democrat,
          incumbent = democrat_incumbent,
          total_votes = democrat_votes,
          vote_prop = vote_prop_dem,
          gender = gender_dem) %>%
    mutate(candidate_party = "Democrat")

Rep_data <- Rep_data %>%
    rename(candidate = republican,
          incumbent = republican_incumbent,
          total_votes = republican_votes,
          vote_prop = vote_prop_rep,
          gender = gender_rep) %>%
    mutate(candidate_party = "Republican")


elections_2016_long <- rbind(Dem_data, Rep_data)

head(elections_2016_long)




Unnamed: 0_level_0,candidate,state,district,year,incumbent,total_votes,vote_prop,gender,winner_party,candidate_party
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>
1,Scott J. Kawasaki,Alaska,1,2016,True,1,1.0,male,Democrat,Democrat
2,Truno Holdaway,Alaska,2,2016,False,1153,0.26080072,Unknown,Republican,Democrat
3,Christina M. Sinclair,Alaska,3,2016,False,537,0.07565511,female,Republican,Democrat
4,David Guttenberg,Alaska,4,2016,True,1,1.0,male,Democrat,Democrat
5,Adam Wool,Alaska,5,2016,True,3812,0.52973874,male,Democrat,Democrat
6,Jason T. Land,Alaska,6,2016,False,2327,0.31222327,male,Republican,Democrat


In [9]:
## Joins for race

race_data <- read.csv("officeholders_race_ethnicity-1606938979.csv")

race_data <- race_data %>%
    select(first_name, middle_name, last_name, race_ethnicity) %>%
    mutate(full_name = paste(first_name, middle_name, last_name, sep = " "),
          alt_name = paste(first_name, last_name, sep = " "))

head(race_data)

race_data_2 <- unique(race_data[, c("full_name", "alt_name", "race_ethnicity")])

head(race_data_2)



Unnamed: 0_level_0,first_name,middle_name,last_name,race_ethnicity,full_name,alt_name
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Kim,K.,Abbott,White,Kim K. Abbott,Kim Abbott
2,Kim,K.,Abbott,White,Kim K. Abbott,Kim Abbott
3,Kim,K.,Abbott,White,Kim K. Abbott,Kim Abbott
4,Kim,K.,Abbott,White,Kim K. Abbott,Kim Abbott
5,Robin,A.,Abbott,White,Robin A. Abbott,Robin Abbott
6,Robin,A.,Abbott,White,Robin A. Abbott,Robin Abbott


Unnamed: 0_level_0,full_name,alt_name,race_ethnicity
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,Kim K. Abbott,Kim Abbott,White
5,Robin A. Abbott,Robin Abbott,White
7,Roberta Abdul-Salaam,Roberta Abdul-Salaam,Black/African American
12,Theresa Abed,Theresa Abed,White
14,Catherine F. Abercrombie,Catherine Abercrombie,White
27,Paula Aboud,Paula Aboud,White


In [10]:
## 2016 race join

head(elections_2016_long)


new_data_2016 <- elections_2016_long %>%
    left_join(race_data_2, by = c("candidate" = "full_name")) %>%
    left_join(race_data_2, by = c("candidate" = "alt_name")) %>%
    mutate(race_ethnicity.x = replace_na(race_ethnicity.x, "Missing"),
          race_ethnicity.y = replace_na(race_ethnicity.y, "Missing"),
          race_ethnicity.x = ifelse(race_ethnicity.x == "Missing", race_ethnicity.y, "Missing")) %>%
    select(-c("full_name", "alt_name", "race_ethnicity.y")) %>%
    rename(race_ethnicity = race_ethnicity.x)


head(new_data_2016)

new_data_2016 %>%
    group_by(race_ethnicity) %>%
    summarize(n())



Unnamed: 0_level_0,candidate,state,district,year,incumbent,total_votes,vote_prop,gender,winner_party,candidate_party
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>
1,Scott J. Kawasaki,Alaska,1,2016,True,1,1.0,male,Democrat,Democrat
2,Truno Holdaway,Alaska,2,2016,False,1153,0.26080072,Unknown,Republican,Democrat
3,Christina M. Sinclair,Alaska,3,2016,False,537,0.07565511,female,Republican,Democrat
4,David Guttenberg,Alaska,4,2016,True,1,1.0,male,Democrat,Democrat
5,Adam Wool,Alaska,5,2016,True,3812,0.52973874,male,Democrat,Democrat
6,Jason T. Land,Alaska,6,2016,False,2327,0.31222327,male,Republican,Democrat


Unnamed: 0_level_0,candidate,state,district,year,incumbent,total_votes,vote_prop,gender,winner_party,candidate_party,race_ethnicity
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<chr>,<int>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,Scott J. Kawasaki,Alaska,1,2016,True,1,1.0,male,Democrat,Democrat,Missing
2,Truno Holdaway,Alaska,2,2016,False,1153,0.26080072,Unknown,Republican,Democrat,Missing
3,Christina M. Sinclair,Alaska,3,2016,False,537,0.07565511,female,Republican,Democrat,Missing
4,David Guttenberg,Alaska,4,2016,True,1,1.0,male,Democrat,Democrat,Missing
5,Adam Wool,Alaska,5,2016,True,3812,0.52973874,male,Democrat,Democrat,Missing
6,Jason T. Land,Alaska,6,2016,False,2327,0.31222327,male,Republican,Democrat,Missing


`summarise()` ungrouping output (override with `.groups` argument)



race_ethnicity,n()
<chr>,<int>
Asian/Pacific Islander,15
Black/African American,110
Hispanic/Latina,48
Missing,6560
Multiracial,1
"Multiracial, Black/African American, Hispanic/Latina",1
"Multiracial, White/Caucasian, Asian/Pacific Islander",2
"Multiracial, White/Caucasian, Black/African American",2
"Multiracial, White/Caucasian, Native American",2
Native American,8


In [11]:
## Save long dataset 2016, commented out to prevent overwrite
# write.csv(new_data_2016, "2016_election_results_long.csv")

In [12]:
## 2018 data prep

# Read data in
state_elections_2018 <- read.csv("2018_election_results.csv")


head(state_elections_2018)

# Filter out races for other positions, special elections, write-ins, NAs
filter_candidates <- state_elections_2018 %>%
    group_by(candidate) %>%
    summarize(count = n()) %>%
    filter(count > 25) %>% # Candidate with most appearances has 24, above that are aggregates/NAs (NA for no opposing candidate is filled back in later)
    select(candidate)


# Remove non-legislative offices, special elctions, write-in candidates, aggregates/NAs
state_elections_2018_filtered <- state_elections_2018 %>%
    filter(office %in% c("State Assembly Member", "State Representative", "State Senator", "State Representative A", "State Representative B"),
          special == FALSE,
           writein == FALSE,
           !candidate %in% filter_candidates$candidate,
          ) %>%
    select(!c("writein","special","unofficial","version"))

dim(state_elections_2018)
dim(state_elections_2018_filtered)

head(state_elections_2018_filtered)



Unnamed: 0_level_0,X,year,state,state_po,state_fips,state_cen,state_ic,office,district,stage,special,candidate,party,writein,mode,candidatevotes,totalvotes,unofficial,version
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<lgl>,<chr>,<int>,<int>,<lgl>,<int>
1,1,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Over Votes,,False,absentee,8,1705793,False,20200508
2,2,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Over Votes,,False,election day,241,1705793,False,20200508
3,3,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Over Votes,,False,provisional,1,1705793,False,20200508
4,4,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Sarah Hicks Stewart,republican,False,absentee,32836,1705793,False,20200508
5,5,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Sarah Hicks Stewart,republican,False,election day,1060731,1705793,False,20200508
6,6,2018,Alabama,AL,1,63,41,"Associate Justice of the Supreme Court, Place 1",statewide,gen,False,Sarah Hicks Stewart,republican,False,provisional,3568,1705793,False,20200508


`summarise()` ungrouping output (override with `.groups` argument)



Unnamed: 0_level_0,X,year,state,state_po,state_fips,state_cen,state_ic,office,district,stage,candidate,party,mode,candidatevotes,totalvotes
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<int>
1,241,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Bobby James Dolan III,independent,absentee,125,11684
2,242,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Bobby James Dolan III,independent,election day,4175,11684
3,243,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Bobby James Dolan III,independent,provisional,36,11684
4,244,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Phillip Pettus,republican,absentee,266,11684
5,245,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Phillip Pettus,republican,election day,7034,11684
6,246,2018,Alabama,AL,1,63,41,State Representative,District 1,gen,Phillip Pettus,republican,provisional,48,11684


In [13]:
# Identify candidates with mode and total vote counts
total_list <- state_elections_2018_filtered %>%
    filter(mode == "total") %>%
    select(candidate)

non_total_list <- state_elections_2018_filtered %>%
    filter(!mode == "total") %>%
    select(candidate)

sum(unique(total_list$candidate) %in% unique(non_total_list$candidate))

match_list <- unique(total_list[total_list$candidate %in% non_total_list$candidate,])

# Remove candidates who have total and individual vote mode counts, loss of ~12 candidates to make coding much easier
state_elections_2018_filtered <- state_elections_2018_filtered %>%
    filter(!candidate %in% match_list) %>%
    group_by(state, district, office, party, candidate) %>%
    summarize(total_votes = sum(candidatevotes)) %>%
    arrange(state, district, office)

# Transform non-major parties to other
state_elections_2018_filtered <- state_elections_2018_filtered %>%
     mutate(party = replace(party, !party %in% c("democrat","republican"), "other"))

head(state_elections_2018_filtered)


`summarise()` regrouping output by 'state', 'district', 'office', 'party' (override with `.groups` argument)



state,district,office,party,candidate,total_votes
<chr>,<chr>,<chr>,<chr>,<chr>,<int>
Alabama,District 1,State Representative,other,Bobby James Dolan III,4336
Alabama,District 1,State Representative,republican,Phillip Pettus,7348
Alabama,District 1,State Senator,democrat,Caroline Self,15830
Alabama,District 1,State Senator,republican,Tim Melson,33141
Alabama,District 10,State Representative,democrat,J.B. King,8565
Alabama,District 10,State Representative,other,Elijah J. Boyd,1130


In [14]:
## Reformat data to single line per election and generate vote totals/vote shares/winner columns
# Reformat and generate vote share, winner 
state_elections_2018_wide <- state_elections_2018_filtered %>%
    mutate(row = row_number()) %>%
    pivot_wider(id_cols = c("state", "district", "office", "row"), names_from = party, values_from = c("candidate", "total_votes")) %>%
    mutate(candidate_democrat = replace_na(candidate_democrat, "No candidate"), 
            candidate_republican = replace_na(candidate_republican, "No candidate"),
            candidate_other = replace_na(candidate_other, "No candidate"),
            total_votes_democrat = replace_na(total_votes_democrat, 0),
            total_votes_republican = replace_na(total_votes_republican, 0),
            total_votes_other = replace_na(total_votes_other, 0),
            vote_share_dem = total_votes_democrat/(total_votes_democrat+total_votes_republican+total_votes_other),
            vote_share_rep = total_votes_republican/(total_votes_democrat+total_votes_republican+total_votes_other),
            vote_share_other = total_votes_other/(total_votes_democrat+total_votes_republican+total_votes_other),
            winner_party = case_when(total_votes_democrat > total_votes_republican & total_votes_democrat > total_votes_other ~ "Democrat",
                              total_votes_republican > total_votes_democrat & total_votes_republican > total_votes_other ~ "Republican",
                              TRUE ~ "Other")) %>%
    select(-row)


head(state_elections_2018_wide)




state,district,office,candidate_other,candidate_republican,candidate_democrat,total_votes_other,total_votes_republican,total_votes_democrat,vote_share_dem,vote_share_rep,vote_share_other,winner_party
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
Alabama,District 1,State Representative,Bobby James Dolan III,Phillip Pettus,No candidate,4336,7348,0,0.0,0.6288942,0.37110579,Republican
Alabama,District 1,State Senator,No candidate,Tim Melson,Caroline Self,0,33141,15830,0.3232525,0.6767475,0.0,Republican
Alabama,District 10,State Representative,Elijah J. Boyd,Mike Ball,J.B. King,1130,11240,8565,0.4091235,0.5368999,0.05397659,Republican
Alabama,District 10,State Senator,Craig Ford,Andrew Jones,No candidate,16759,25902,0,0.0,0.6071588,0.39284124,Republican
Alabama,District 100,State Representative,No candidate,Victor Gaston,No candidate,0,12086,0,0.0,1.0,0.0,Republican
Alabama,District 101,State Representative,No candidate,Chris Pringle,No candidate,0,10274,0,0.0,1.0,0.0,Republican


In [15]:
## Reformatting continued, gender identification
# Generate gender (takes absolutely forever, do not run this if you don't need to)
state_elections_2018_wide$gender_democrat <- gender_fill(state_elections_2018_wide$candidate_democrat)
state_elections_2018_wide$gender_republican <- gender_fill(state_elections_2018_wide$candidate_republican)


head(state_elections_2018_wide)


ERROR: Error in utils::packageVersion("genderdata"): there is no package called ‘genderdata’


In [None]:
state_elections_2018_wide <- state_elections_2018_wide %>%
    mutate(winner_gender = case_when(winner_party == "Democrat" ~ gender_democrat,
                                    winner_party == "Republican" ~ gender_republican,
                                    TRUE ~ "Other candidate"),
          year = 2018)

head(state_elections_2018_wide)

In [None]:
## Save short version of dataset with gender (one line per election), commented out to prevent overwrite
# write.csv(state_elections_2018_wide, "2018_election_results_wide.csv")


In [None]:
## Reformat to one line per candidate, major parties only

Dem_data_2018 <- state_elections_2018_wide %>%
    select(year, state, district, office, candidate_democrat, total_votes_democrat, vote_share_dem, gender_democrat, winner_party) %>%
    rename(Candidate = candidate_democrat,
          Votes = total_votes_democrat,
          Vote.Prop = vote_share_dem,
          Gender = gender_democrat) %>%
    mutate(Candidate.Party = "Democrat")

Rep_data_2018 <- state_elections_2018_wide %>%
    select(year, state, district, office, candidate_republican, total_votes_republican, vote_share_rep, gender_republican, winner_party) %>%
    rename(Candidate = candidate_republican,
          Votes = total_votes_republican,
          Vote.Prop = vote_share_rep,
          Gender = gender_republican) %>%
    mutate(Candidate.Party = "Republican")

state_elections_2018_long <- rbind(Dem_data_2018, Rep_data_2018)

head(state_elections_2018_long)
tail(state_elections_2018_long)

In [None]:
# Race join, 2018
new_data_2018 <- state_elections_2018_long %>%
    left_join(race_data_2, by = c("Candidate" = "full_name")) %>%
    left_join(race_data_2, by = c("Candidate" = "alt_name")) %>%
    mutate(race_ethnicity.x = replace_na(race_ethnicity.x, "Missing"),
          race_ethnicity.y = replace_na(race_ethnicity.y, "Missing"),
          race_ethnicity.x = ifelse(race_ethnicity.x == "Missing", race_ethnicity.y, "Missing")) %>%
    select(-c("full_name", "alt_name", "race_ethnicity.y")) %>% 
    rename(race_ethnicity = race_ethnicity.x,
           candidate = Candidate,
           total_votes = Votes,
           vote_prop = Vote.Prop,
           gender = Gender,
           candidate_party = Candidate.Party)
# renamed variables for better consistency

head(new_data_2018)

new_data_2018 %>%
    group_by(race_ethnicity) %>%
    summarize(n())


In [None]:
## Save long version of dataset with gender (one line per candidate)
# Commented out to prevent overwrite
# write.csv(state_elections_2018_long, "2018_election_results_long.csv")
