In [55]:
library(ggplot2)
library(tidyverse)
library(broom)
library(dplyr)
womenwinning_df <- read.csv("officeholders_race_ethnicity.csv")

### Intial exploring

In [56]:
head(womenwinning_df)

Unnamed: 0_level_0,id,year,first_name,middle_name,last_name,party,level,position,state,district,race_ethnicity
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,566417k,2017,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
2,566417k,2018,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
3,566417k,2019,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
4,566417k,2020,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
5,14772c,2010,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White
6,14772c,2011,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White


In [57]:
table_of_race <- table(womenwinning_df["race_ethnicity"])
head(table_of_race)


                                     Asian/Pacific Islander 
                                                        634 
                                     Black/African American 
                                                       3695 
                                            Hispanic/Latina 
                                                       1397 
                               Middle Eastern/North African 
                                                         22 
                                                Multiracial 
                                                         15 
Multiracial, Black/African American, Asian/Pacific Islander 
                                                         18 

In [58]:
table_of_levels <- table(womenwinning_df["level"])
head(table_of_levels)


   Federal/Congress   State Legislative Statewide Executive    Territorial/D.C. 
               1381               24540                1017                 112 

In [59]:
table_of_positions <- table(womenwinning_df["position"])
head(table_of_positions)


             Agriculture         Attorney General                  Auditor 
                       9                       98                      106 
 Chief Financial Officer              Comptroller Corporation Commissioner 
                       3                       40                       34 

### Making a dataframe with only the State Legislators

In [60]:
state_leg_df <- womenwinning_df%>%filter(level == "State Legislative")
        
head(state_leg_df)

Unnamed: 0_level_0,id,year,first_name,middle_name,last_name,party,level,position,state,district,race_ethnicity
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,566417k,2017,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
2,566417k,2018,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
3,566417k,2019,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
4,566417k,2020,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
5,14772c,2010,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White
6,14772c,2011,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White


In [61]:
dim(state_leg_df) 

In [62]:
tbl_of_positions2 <- table(state_leg_df["position"])
print(tbl_of_positions2) #checking for cleanness


State Representative        State Senator 
               18620                 5920 


### Looking at Women by State, Year, Race, Region

In [63]:
tbl_by_state <- table(state_leg_df["state"]) #gotta convert this to a string, 
                                             #and then we could do percents too
print(tbl_by_state) #raw number based on location


       Alabama - AL         Alaska - AK        Arizona - AZ       Arkansas - AR 
                262                 218                 428                 369 
    California - CA       Colorado - CO    Connecticut - CT       Delaware - DE 
                425                 542                 729                 202 
       Florida - FL        Georgia - GA           Guam - GU         Hawaii - HI 
                559                 750                  11                 316 
         Idaho - ID       Illinois - IL        Indiana - IN           Iowa - IA 
                392                 763                 423                 466 
        Kansas - KS       Kentucky - KY      Louisiana - LA          Maine - ME 
                596                 325                 280                 770 
      Maryland - MD  Massachusetts - MA       Michigan - MI      Minnesota - MN 
                805                 684                 467                 872 
   Mississippi - MS       M

In [64]:
state_leg_yr <- state_leg_df%>%
    group_by(year) %>%
    summarize(count = n())
print(state_leg_yr)

`summarise()` ungrouping output (override with `.groups` argument)



[90m# A tibble: 13 x 2[39m
    year count
   [3m[90m<int>[39m[23m [3m[90m<int>[39m[23m
[90m 1[39m  [4m2[24m008  [4m1[24m782
[90m 2[39m  [4m2[24m009  [4m1[24m839
[90m 3[39m  [4m2[24m010  [4m1[24m844
[90m 4[39m  [4m2[24m011  [4m1[24m786
[90m 5[39m  [4m2[24m012  [4m1[24m778
[90m 6[39m  [4m2[24m013  [4m1[24m828
[90m 7[39m  [4m2[24m014  [4m1[24m829
[90m 8[39m  [4m2[24m015  [4m1[24m846
[90m 9[39m  [4m2[24m016  [4m1[24m828
[90m10[39m  [4m2[24m017  [4m1[24m893
[90m11[39m  [4m2[24m018  [4m1[24m914
[90m12[39m  [4m2[24m019  [4m2[24m169
[90m13[39m  [4m2[24m020  [4m2[24m204


In [65]:
state_leg_race <- state_leg_df%>%
    group_by(race_ethnicity == "White") %>%
    summarize(count = n())
print(state_leg_race)

`summarise()` ungrouping output (override with `.groups` argument)



[90m# A tibble: 2 x 2[39m
  `race_ethnicity == "White"` count
  [3m[90m<lgl>[39m[23m                       [3m[90m<int>[39m[23m
[90m1[39m FALSE                        [4m5[24m483
[90m2[39m TRUE                        [4m1[24m[4m9[24m057


In [66]:
state_leg_race <- state_leg_df%>%       #it's a little weird that "White" doesn't come up here?
group_by(race_ethnicity) %>%
    summarize(count = n())
print(state_leg_race)

`summarise()` ungrouping output (override with `.groups` argument)



[90m# A tibble: 21 x 2[39m
   race_ethnicity                                              count
   [3m[90m<chr>[39m[23m                                                       [3m[90m<int>[39m[23m
[90m 1[39m Asian/Pacific Islander                                        497
[90m 2[39m Black/African American                                       [4m3[24m412
[90m 3[39m Hispanic/Latina                                              [4m1[24m181
[90m 4[39m Middle Eastern/North African                                   18
[90m 5[39m Multiracial                                                    13
[90m 6[39m Multiracial, Black/African American, Asian/Pacific Islander     2
[90m 7[39m Multiracial, Black/African American, Hispanic/Latina           13
[90m 8[39m Multiracial, Black/African American, Native American           13
[90m 9[39m Multiracial, Hispanic/Latina, Asian/Pacific Islander           15
[90m10[39m Multiracial, Hispanic/Latina, Native American        

In [67]:
head(state_leg_df)

Unnamed: 0_level_0,id,year,first_name,middle_name,last_name,party,level,position,state,district,race_ethnicity
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,566417k,2017,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
2,566417k,2018,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
3,566417k,2019,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
4,566417k,2020,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White
5,14772c,2010,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White
6,14772c,2011,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White


In [76]:
states_northeast <- c("Connecticut - CT", "Delaware - DE", "Maine - ME", "Massachusetts - MA",
                      "New Hampshire - NH", "New Jersey - NJ", "New York - NY", "Pennsylvania - PA", 
                      "Rhode Island - RI", "Vermont - VT")
states_west <- c("Alaska - AK", "Arizona - AZ", "California - CA", "Colorado - CO", "Hawaii - HI", "Idaho - ID", "Nevada - NV", "Montana - MT",
                 "New Mexico - NM", "Oregon - OR", "Washington - WA", "Wyoming - WY")
states_midwest <- c("Indiana - IN", "Illinois - IL", "Iowa - IA", "Kansas - KS", "Michigan - MI", "Minnesota - MN", "Missouri - MO", 
                    "Nebraska - NE", "North Dakota - ND", "Ohio - OH", "South Dakota - SD", "Wisconsin - WI")
states_south <- c("Alabama - AL", "Arkansas - AR", 
                  "Florida - FL", "Georgia - GA", "Kentucky - KY", "Louisiana - LA", "Maryland - MD", "Mississippi - MS", 
                  "North Carolina - NC", "Oklahoma - OK", "South Carolina - SC", "Tennessee - TN", "Texas - TX", 
                  "Virginia - VA", "West Virginia - WV")


# Replace "dataset" with name of your dataset
state_leg_df <- state_leg_df %>%
  mutate(region = case_when(state %in% states_northeast ~ "Northeast",
                            state %in% states_west ~ "West",
                            state %in% states_midwest ~ "Midwest",
                            state %in% states_south ~ "South",
                            TRUE ~ "Other"))

head(state_leg_df)

Unnamed: 0_level_0,id,year,first_name,middle_name,last_name,party,level,position,state,district,race_ethnicity,region
Unnamed: 0_level_1,<chr>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,566417k,2017,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White,West
2,566417k,2018,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White,West
3,566417k,2019,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White,West
4,566417k,2020,Kim,K.,Abbott,Democrat,State Legislative,State Representative,Montana - MT,83,White,West
5,14772c,2010,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White,South
6,14772c,2011,Robin,A.,Abbott,Democrat,State Legislative,State Representative,Virginia - VA,93,White,South


In [78]:
state_leg_region <- state_leg_df%>%
    group_by(region) %>%
    summarize(count = n())
print(state_leg_region)

`summarise()` ungrouping output (override with `.groups` argument)



[90m# A tibble: 5 x 2[39m
  region    count
  [3m[90m<chr>[39m[23m     [3m[90m<int>[39m[23m
[90m1[39m Midwest    [4m5[24m808
[90m2[39m Northeast  [4m7[24m313
[90m3[39m Other       286
[90m4[39m South      [4m6[24m285
[90m5[39m West       [4m4[24m848
