Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
cdcr-population-data/analysis/measuring_overcrowding.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
241 lines (222 sloc)
8.58 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(plotly) | |
plotly_barchart_of_pcts <- function(df, path_to_save=NULL, overwrite=FALSE) { | |
num_institutions <- length(unique(df$institution_name)) | |
font_for_labels <- list(color = '#cdc5bf', | |
family = 'sans serif', | |
size = 12) | |
in_order <- df %>% arrange(percent_occupied) | |
order_level <- in_order$institution_name | |
df$factor_name <- factor(df$institution_name, levels = order_level) | |
# This is probably more confusing than it needs to be, but basically we want the damn thing to be in order based | |
# on the `percent_occupied` value. This does that. | |
df <- df %>% arrange(factor_name) | |
p <- df %>% | |
plot_ly() %>% | |
add_trace( | |
x = ~percent_occupied, | |
y = ~factor_name, | |
type = "bar", | |
hoverinfo = 'text', | |
text = ~paste( | |
'Institution: ', | |
institution_name, | |
'<br>', | |
'Capacity: ', designed_capacity, ', Actual: ', total_population, | |
'<br>', | |
percent_occupied, | |
'% occupied', | |
sep = '' | |
) | |
) %>% | |
add_annotations( | |
# All the way to the left | |
x = 0, | |
y = ~institution_name, | |
text = ~institution_name, | |
# See https://github.com/plotly/plotly.js/blob/master/src/components/annotations/attributes.js#L349 | |
xanchor = 'left', | |
font = font_for_labels, | |
showarrow = FALSE | |
) %>% | |
# Include a label showing what the 137.5% bar means | |
add_annotations( | |
x = 137.5, | |
y = num_institutions - 1, | |
text = '137.5% of designed capacity', | |
xref = "x", | |
yref = "y", | |
showarrow = TRUE, | |
arrowhead = 4, | |
arrowsize = .5, | |
ax = 20, | |
ay = -40 | |
) %>% | |
layout( | |
title = 'CA State Prison Population Overcrowding, Dec 2017', | |
xaxis = list(title = "Percent Occupied"), | |
yaxis = list(title = "", | |
zeroline = FALSE, | |
showline = FALSE, | |
showticklabels = FALSE, | |
showgrid = FALSE), | |
barmode = 'group', | |
# Include a vertical line showing the 137.5% threshold | |
shapes = list(type=line, x0=137.5, y0=0, x1=137.5, y1=num_institutions) | |
) | |
if (!is.null(path_to_save) && overwrite) { | |
api_create(p, filename = path_to_save) | |
} | |
} | |
plotly_scatter <- function(df, path_to_save=NULL, overwrite=FALSE) { | |
# Used to set axis limits, we want a square grid | |
axes_limit <- max(df$total_population) * 1.1 | |
p <- plot_ly( | |
data = df, | |
x = ~designed_capacity, | |
y = ~total_population, | |
type = 'scatter', | |
name = 'Actual Population', | |
hoverinfo = 'text', | |
text = ~paste( | |
'Institution: ', | |
institution_name, | |
'<br>', | |
'Capacity: ', designed_capacity, ', Actual: ', total_population, | |
'<br>', | |
percent_occupied, | |
'% occupied', | |
sep = '' | |
) | |
) %>% | |
# Disable hovering over the lines, it's just confusing - we only want in enabled for points | |
add_trace(y = ~overcrowded_limit, mode = 'lines', name = '"Overcrowding" Threshold', line = list(color = 'red'), hoverinfo='none') %>% | |
add_trace(y = ~designed_capacity, mode = 'lines', name = 'Population = Capacity', hoverinfo='none') %>% | |
# Styling stuff | |
layout( | |
title = 'Prison Population vs. Capacities, CA Dec 2017', | |
xaxis = list(title = "Capacity of Institution", range = c(0, axes_limit)), | |
yaxis = list(title = "Actual Population in Institution", range = c(0, axes_limit)) | |
) | |
if (!is.null(path_to_save) && overwrite) { | |
api_create(p, filename = path_to_save) | |
} | |
} | |
example_distributions <- function() { | |
institution_name <- c('A', 'B', 'C', 'D', 'E') | |
designed_capacity <- c(100, 100, 100, 100, 100) | |
total_population <- c(120, 120, 120, 120, 120) | |
balanced_example <- data.frame(institution_name, designed_capacity, total_population) | |
print(paste("Aggregate crowding:", sum(balanced_example$total_population) / sum(balanced_example$designed_capacity))) | |
total_population <- c(100, 100, 100, 100, 200) | |
unbalanced_example <- data.frame(institution_name, designed_capacity, total_population) | |
print(paste("Aggregate crowding:", sum(unbalanced_example$total_population) / sum(unbalanced_example$designed_capacity))) | |
} | |
calc_pct_in_overcrowded <- function(df) { | |
end_of_year_pct_in_overcrowded <- df %>% | |
filter(month == '12') %>% | |
group_by(year) %>% | |
summarise( | |
# Total number of people | |
population_across_all = sum(total_population), | |
designed_cap_across_all = sum(designed_capacity), | |
num_prisons_exceeding_limit = sum(ifelse(num_exceeding_limit > 0, 1, 0)), | |
total_num_prisons = n(), | |
# Number of people in a prison that is exceeding the 137.5% limit | |
total_in_prison_exceeding_limit = sum(ifelse(num_exceeding_limit > 0, total_population, 0)), | |
# Number of people greater than the 137.5% limit. E.g. if designed cap is 100, 137.5 is limit, and prison has | |
# 150 people, then this stat would be 150 - 137.5 = 12.5. If there were 12.5 fewer people in this prison, then | |
# it wouldn't be technically "overcrowded" | |
total_excession = sum(ifelse(num_exceeding_limit > 0, num_exceeding_limit, 0)), | |
pct_overpopulated = (total_excession / population_across_all) * 100, | |
pct_of_population_in_an_overcrowded = (total_in_prison_exceeding_limit / population_across_all) * 100, | |
aggregate_pct_of_capacity = population_across_all / designed_cap_across_all | |
) | |
return(end_of_year_pct_in_overcrowded) | |
} | |
plot_pct_in_overcrowded <- function(df, path_to_save=NULL, overwrite=FALSE) { | |
p <- plot_ly( | |
data = df %>% filter(year > 2000), | |
x = ~year, | |
y = ~pct_of_population_in_an_overcrowded, | |
type = 'scatter', | |
mode = 'lines', | |
hoverinfo = 'text', | |
text = ~paste( | |
'End of ', | |
year, | |
'<br>', | |
num_prisons_exceeding_limit, ' out of ', total_num_prisons, ' exceeded 137.5% desgined capacity', | |
'<br>', | |
# Thanks to https://stackoverflow.com/a/12135122 | |
format(round(pct_of_population_in_an_overcrowded, 2), nsmall = 2), | |
'% of prison population lived in an overcrowded prison', | |
sep = '' | |
) | |
) %>% | |
# Styling stuff | |
layout( | |
title = 'Percent of CA Prison Population Living in an Overcrowded Prison', | |
xaxis = list(title = "Year"), | |
yaxis = list(title = "% of people in prisons exceeding 137.5% threshold", range=c(0, 101)) | |
) | |
if (!is.null(path_to_save) && overwrite) { | |
api_create(p, filename = path_to_save) | |
} | |
} | |
plot_pct_exceeding_overcrowding <- function(df, path_to_save=NULL, overwrite=FALSE) { | |
p <- plot_ly( | |
data = df %>% filter(year > 2000), | |
x = ~year, | |
y = ~pct_overpopulated, | |
type = 'scatter', | |
mode = 'lines', | |
hoverinfo = 'text', | |
text = ~paste( | |
'End of ', | |
year, | |
'<br>', | |
num_prisons_exceeding_limit, ' out of ', total_num_prisons, ' exceeded 137.5% desgined capacity', | |
'<br>', | |
# Thanks to https://stackoverflow.com/a/12135122 | |
format(round(pct_overpopulated, 2), nsmall = 2), | |
'% of prison population is exceeding 137.5% limit', | |
sep = '' | |
) | |
) %>% | |
# Styling stuff | |
layout( | |
title = 'Percent of Prison Population Exceeding 137.5%', | |
xaxis = list(title = "Year"), | |
yaxis = list(title = "% of people exceeding 137.5% limit", range = c(0, max(df$pct_overpopulated))) | |
) | |
if (!is.null(path_to_save) && overwrite) { | |
api_create(p, filename = path_to_save) | |
} | |
} | |
summary_stats <- function(df) { | |
total_prisons <- nrow(df) | |
above_137 <- df %>% filter(percent_occupied > 137.5) | |
num_above_137 <- nrow(above_137) | |
print(paste(num_above_137, ' out of ', total_prisons, ' were above 137.5% ', num_above_137 / total_prisons, '%', sep='')) | |
total_in_overcrowded <- sum(above_137$total_population) | |
total_in_all <- sum(df$total_population) | |
print(paste(total_in_overcrowded, ' out of ', total_in_all, ' were in an overcrowded prison, or ', total_in_overcrowded / total_in_all, '%', sep='')) | |
} | |
prep_data <- function() { | |
data <- read.csv('data/monthly_cdcr_population.csv') | |
with_stats <- data %>% | |
mutate( | |
overcrowded_limit = 1.375 * designed_capacity, | |
num_exceeding_design = total_population - designed_capacity, | |
num_exceeding_limit = total_population - overcrowded_limit | |
) | |
return(with_stats) | |
} | |
data <- prep_data() | |
end_of_2017 <- data %>% filter(year == '2017') %>% filter(month == '12') | |
summary_stats(end_of_2017) | |
plotly_barchart_of_pcts(end_of_2017, 'overcrowding-barchart-pcts') | |
plotly_scatter(end_of_2017, 'overcrowding-scatter-end-of-2017') | |
overcrowding_stats <- calc_pct_in_overcrowded(data) | |
plot_pct_in_overcrowded(overcrowding_stats, 'overcrowding-pct-in-overcrowded') | |
plot_pct_exceeding_overcrowding(overcrowding_stats, 'overcrowding-pct-exceeding-limit') |