In [None]:
#install.packages("reactable")
#library(reactable)
library(ggplot2)
library(tidyverse)
library(broom)
library(dplyr)
library(knitr)
womenwinning_df <- read.csv("officeholders_race_ethnicity.csv")

### Intial exploring

In [None]:
head(womenwinning_df)

In [None]:
table_of_race <- table(womenwinning_df["race_ethnicity"])
head(table_of_race)

In [None]:
table_of_levels <- table(womenwinning_df["level"])
head(table_of_levels)

In [None]:
table_of_positions <- table(womenwinning_df["position"])
head(table_of_positions)

### Making a dataframe with only the State Legislators and Removing Guam and Puerto Rico for consistency

In [None]:
state_leg_df <- womenwinning_df%>%
    filter(
    level == "State Legislative" &
    state != "Guam - GU" & state != "Puerto Rico - PR"
    )
        
head(state_leg_df)

In [None]:
dim(state_leg_df) 

In [None]:
tbl_of_positions2 <- table(state_leg_df["position"])
print(tbl_of_positions2) #checking for cleanness

### Looking at Women by State, Year, Race, Region

In [None]:
tbl_by_state <- table(state_leg_df["state"]) #gotta convert this to a string, 
                                             #and then we could do percents too
print(tbl_by_state) #raw number based on location

In [None]:
state_leg_yr <- state_leg_df%>%
    group_by(year) %>%
    summarize(
        count = n())
print(state_leg_yr)

In [None]:
state_leg_race <- state_leg_df%>%
    group_by(race_ethnicity == "White") %>%
    summarize(count = n())
print(state_leg_race)

In [None]:
state_leg_race <- state_leg_df%>%       #it's a little weird that "White" doesn't come up here?
group_by(race_ethnicity) %>%
    summarize(count = n())
print(state_leg_race)


In [None]:
head(state_leg_df)

In [None]:
states_northeast <- c("Connecticut - CT", "Maine - ME", "Massachusetts - MA",
                      "New Hampshire - NH", "New Jersey - NJ", "New York - NY", "Pennsylvania - PA", 
                      "Rhode Island - RI", "Vermont - VT")
states_west <- c("Alaska - AK", "Arizona - AZ", "California - CA", "Colorado - CO", "Hawaii - HI", "Idaho - ID", "Nevada - NV", "Montana - MT",
                 "New Mexico - NM", "Oregon - OR", "Utah - UT", "Washington - WA", "Wyoming - WY")
states_midwest <- c("Indiana - IN", "Illinois - IL", "Iowa - IA", "Kansas - KS", "Michigan - MI", "Minnesota - MN", "Missouri - MO", 
                    "Nebraska - NE", "North Dakota - ND", "Ohio - OH", "South Dakota - SD", "Wisconsin - WI")
states_south <- c("Alabama - AL", "Arkansas - AR", "Delaware - DE",
                  "Florida - FL", "Georgia - GA", "Kentucky - KY", "Louisiana - LA", "Maryland - MD", "Mississippi - MS", 
                  "North Carolina - NC", "Oklahoma - OK", "South Carolina - SC", "Tennessee - TN", "Texas - TX", 
                  "Virginia - VA", "West Virginia - WV")


# Replace "dataset" with name of your dataset
state_leg_df <- state_leg_df %>% 
    mutate(region = case_when(state %in% states_northeast ~ "Northeast",
                            state %in% states_west ~ "West",
                            state %in% states_midwest ~ "Midwest",
                            state %in% states_south ~ "South",
                            TRUE ~ "Other"))

head(state_leg_df)



In [None]:

state_leg_region_df <- state_leg_df%>%
    group_by(region) %>%
    summarize(count = n())
print(state_leg_region_df)

pie(state_leg_region_df$count, labels = state_leg_region_df$region)


### Looking at Women Winning by State, Year, Race, Region from 2016-present

In [None]:

state_leg_party_2016 <- state_leg_df%>%
    filter(year > 2015)%>%
    group_by(state, year)%>%
    summarize(
    count = n(),
    prop_white = mean(race_ethnicity == "White"),
    prop_nonwhite = mean(race_ethnicity != "White"),
    prop_black = mean(race_ethnicity == "Black/African American"),
    prop_hispanic = mean(race_ethnicity == "Hispanic/Latina"),
    prop_mideastern = mean(race_ethnicity == "Middle Eastern/North African"),
    prop_asian = mean(race_ethnicity == "Asian/Pacific Islander"),
    prop_repub = mean(party == "Republican"),
    prop_dem = mean(party == "Democrat")
    )
#create data frame with women winning from each state, with proportions of race and ethnicity since 2016
head(state_leg_party_2016)

In [None]:
print(kable(state_leg_party_2016))

In [None]:
state_leg_region_2016 <- state_leg_df%>%
    filter(year > 2015)%>%
    group_by(region, year)%>%
    summarize(
    count = n(),
    prop_white = mean(race_ethnicity == "White"),
    prop_nonwhite = mean(race_ethnicity != "White"),
    prop_black = mean(race_ethnicity == "Black/African American"),
    prop_hispanic = mean(race_ethnicity == "Hispanic/Latina"),
    prop_mideastern = mean(race_ethnicity == "Middle Eastern/North African"),
    prop_asian = mean(race_ethnicity == "Asian/Pacific Islander"),
    prop_repub = mean(party == "Republican"),
    prop_dem = mean(party == "Democrat"))
head(state_leg_region_2016)

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = count, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Number of Women in State Legislature")
ggsave("Number of Women in State Legislature by Region 2016-present.png", width = 5, height = 5)



In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_white, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of White Women of the Women in State Legislatures")
ggsave("Proportion White Women in State Legislature by Region 2016-present.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_nonwhite, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Non-White Women of the Women in State Legislatures")
ggsave("Proportion Non-White Women in State Legislature by Region 2016-present.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_black, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Black/African-American Women of Women in State Legislatures")

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_hispanic, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Hispanic/Latina Women of the Women in State Legislatures")

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_mideastern, colour = region)) +
        geom_jitter(alpha = 1)+
        xlab("Year") +
        ylab("Proportion of Middle Eastern Women of the Women in State Legislatures")

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_asian, colour = region)) +
        geom_jitter(alpha = 1)+
        xlab("Year") +
        ylab("Proportion of Asian/Pacific Islander Women of the Women in State Legislatures")

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_dem, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Democratic Women of the Women in State Legislatures")
ggsave("Proportion Democratic Women in State Legislature by Region 2016-present.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_region_2016, aes(x = year, y = prop_repub, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Republican Women of the Women in State Legislatures")
ggsave("Proportion Republican Women in State Legislature by Region 2016-present.png", width = 5, height = 5)

In [None]:
print(kable(state_leg_region_2016))

In [None]:
state_leg_partyaff_2016 <- state_leg_df%>%
    filter(year > 2015)%>%
    group_by(party, year)%>%
    summarize(
    count = n(),
    prop_white = mean(race_ethnicity == "White"),
    prop_nonwhite = mean(race_ethnicity != "White"),
    prop_black = mean(race_ethnicity == "Black/African American"),
    prop_hispanic = mean(race_ethnicity == "Hispanic/Latina"),
    prop_mideastern = mean(race_ethnicity == "Middle Eastern/North African"),
    prop_asian = mean(race_ethnicity == "Asian/Pacific Islander")
        ) #create dataframe with women winning based on party affliation and race since 2016

In [None]:
print((kable(state_leg_partyaff_2016)))

In [None]:
state_leg_region_2016[which.max(state_leg_region_2016$count),]

In [None]:
state_leg_region_2016[which.min(state_leg_region_2016$count),]

In [None]:
state_leg_party_2016[which.max(state_leg_party_2016$count),]

In [None]:
state_leg_party_2016[which.min(state_leg_party_2016$count),]

In [None]:
state_leg_party_2016[which.max(state_leg_party_2016$prop_nonwhite),]

In [None]:
state_leg_party_2016[which.min(state_leg_party_2016$prop_nonwhite),]

In [None]:
write.csv(state_leg_partyaff_2016, file = "statelegpartyaff_2016.csv")

In [None]:
write.csv(state_leg_region_2016, file = "statelegregion_2016.csv")

In [None]:
write.csv(state_leg_party_2016, file = "statelegparty_2016.csv")

In [None]:
state_leg_region_in2017 <- state_leg_region_2016 %>%
                            filter(
                            year == 2017)

state_leg_region_in2017[which.max(state_leg_region_in2017$count),] #figure out where more women were serving after 2016 election



In [None]:
state_leg_region_in2017[which.min(state_leg_region_in2017$count),] #figure out where least women were serving after 2016 election

In [None]:
state_leg_region_in2017[which.max(state_leg_region_in2017$prop_nonwhite),]
state_leg_region_in2017[which.min(state_leg_region_in2017$prop_nonwhite),]

In [None]:
state_leg_region_in2019 <- state_leg_region_2016 %>%
                            filter(
                            year == 2019)

state_leg_region_in2019[which.max(state_leg_region_in2019$count),] #figure out where more women were serving after 2018 midterms

In [None]:
state_leg_region_in2019[which.min(state_leg_region_in2019$count),] #figure out where more women were serving after 2018 midterms

In [None]:
state_leg_region_in2019[which.max(state_leg_region_in2019$prop_nonwhite),]
state_leg_region_in2019[which.min(state_leg_region_in2019$prop_nonwhite),]

### Investigating 2008-present Trends

In [None]:
state_leg_partyaff <- state_leg_df%>%
    group_by(party, year)%>%
    summarize(
    count = n(),
    prop_white = mean(race_ethnicity == "White"),
    prop_nonwhite = mean(race_ethnicity != "White"),
    prop_black = mean(race_ethnicity == "Black/African American"),
    prop_hispanic = mean(race_ethnicity == "Hispanic/Latina"),
    prop_mideastern = mean(race_ethnicity == "Middle Eastern/North African"),
    prop_asian = mean(race_ethnicity == "Asian/Pacific Islander")
        ) #create dataframe with women winning based on party affliation and race since 2008



In [None]:
print(kable(state_leg_partyaff))

In [None]:
state_leg_region <- state_leg_df%>%
    group_by(region, year)%>%
    summarize(
    count = n(),
    prop_white = mean(race_ethnicity == "White"),
    prop_nonwhite = mean(race_ethnicity != "White"),
    prop_black = mean(race_ethnicity == "Black/African American"),
    prop_hispanic = mean(race_ethnicity == "Hispanic/Latina"),
    prop_mideastern = mean(race_ethnicity == "Middle Eastern/North African"),
    prop_asian = mean(race_ethnicity == "Asian/Pacific Islander"),
    prop_repub = mean(party == "Republican"),
    prop_dem = mean(party == "Democrat"))
print(kable(state_leg_region)) #create dataframe with women winning by region and race since 2008

In [None]:
ggplot(state_leg_region, aes(x = year, y = count, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Number of Women in State Legislature")
ggsave("Scatterplot of Number of Women in State Legislature by Region.png", width = 5, height = 5)


In [None]:
ggplot(state_leg_region, aes(x = year, y = count, colour = region, fill = region)) +
        geom_bar(stat = "identity") + 
        xlab("Year")+
        ylab("Number of Women in State Legislature")
ggsave("Bar Graph of Number of Women in State Legislature by Region.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_region, aes(x = year, y = prop_white, colour = region)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of White Women of Women in State Legislature")
ggsave("Proportion of White Women of Women in State Legislature by Region.png", width = 5, height = 5)

In [None]:
state_leg_partyaff_dorr <- state_leg_partyaff %>% 
                        filter(
                        party == "Democrat" | party == "Republican")
                            

ggplot(state_leg_partyaff_dorr, aes(x = year, y = count, colour = party)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Number of Women in State Legislature")
ggsave("Number of Women in State Legislature by Party.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_partyaff_dorr, aes(x = year, y = prop_white, colour = party)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of White Women of Women in State Legislature")
ggsave("Proportion of White Women of the Women in State Legislature by Party.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_partyaff_dorr, aes(x = year, y = prop_nonwhite, colour = party)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Non-White Women of Women in State Legislature")
ggsave("Proportion of Non-White Women of the Women in State Legislature by Party.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_partyaff_dorr, aes(x = year, y = prop_black, colour = party)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Black Women of Women in State Legislature")
ggsave("Proportion of Black Women of the Women in State Legislature by Party.png", width = 5, height = 5)

In [None]:
ggplot(state_leg_partyaff_dorr, aes(x = year, y = prop_hispanic, colour = party)) +
        geom_jitter(alpha = 1)+
        geom_smooth(method = "lm", se = FALSE)+
        xlab("Year") +
        ylab("Proportion of Hispanic Women of Women in State Legislature")
ggsave("Proportion of Hispanic Women of the Women in State Legislature by Party.png", width = 5, height = 5)

In [None]:
state_leg_partyaff_DorR_2017 <- state_leg_party_2016 %>%
                            filter(
                            year == 2017 
                            )

state_leg_partyaff_DorR_2017[which.max(state_leg_partyaff_DorR_2017$count),] #which had the most women serving in 2017?
state_leg_partyaff_DorR_2017[which.min(state_leg_partyaff_DorR_2017$count),] #which state had least amount of women serving in 2017?

In [None]:
state_leg_partyaff_DorR_2017[which.max(state_leg_partyaff_DorR_2017$prop_nonwhite),]
state_leg_partyaff_DorR_2017[which.min(state_leg_partyaff_DorR_2017$prop_nonwhite),]

In [None]:
state_leg_partyaff_DorR_2019 <- state_leg_party_2016 %>%
                            filter(
                            year == 2019 
                            )

state_leg_partyaff_DorR_2019[which.max(state_leg_partyaff_DorR_2019$count),] #which state had the most women serving in 2019?
state_leg_partyaff_DorR_2019[which.min(state_leg_partyaff_DorR_2019$count),]#which state had the least amount women serving in 2019?

In [None]:
state_leg_partyaff_DorR_2019[which.max(state_leg_partyaff_DorR_2019$prop_nonwhite),]
state_leg_partyaff_DorR_2019[which.min(state_leg_partyaff_DorR_2019$prop_nonwhite),]