-
Notifications
You must be signed in to change notification settings - Fork 0
/
figure_2_barriers_overall.R
244 lines (222 loc) · 11.1 KB
/
figure_2_barriers_overall.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Create "overall frequency of barriers" figure for Survey 2
# modeled on Figure 4 of Williams et al. (2019) using figure_04.R
# prerequisites
rm(list = ls())
require(tidyverse)
require(ggthemes)
require(infer)
# start with the `Merged_Data` set produced in Chapter 0 and rename columns
Merged_Data <- read_csv("Merged_Data_Anonymous.csv")
# Retrieve "I lack" yes/no response columns, replace names & make data tidy
barrier_df <- Merged_Data %>%
select(`I lack expertise in bioinformatics`,
`I lack experience teaching bioinformatics` = `I lack experience in teaching bioinformatics....12`,
`I lack time to restructure course(s)`,
`I lack autonomy to add course content` = `I lack the autonomy to add content to my course(s)....14`,
`I lack space to add course content` = `I lack space in my course(s) to add content....15`,
`I lack curricular materials` = `I lack curricular materials....16`,
`I lack technical resources` = `I lack appropriate technical resources (internet access/software/hardware/IT support)....17`,
`My students lack technical resources` = `My student population lacks access to appropriate technical resources (internet access/software/hardware/IT support)....18`,
`My students lack prerequisite skills` = `My student population lacks prerequisite skills.`,
`My students lack interest` = `My student population lacks interest in bioinformatics....20`) %>%
pivot_longer(everything(), names_to = "Barrier", values_to = "Response") %>%
mutate(Response = replace_na(Response, "I do NOT agree with this statement"))
# association between barrier and response?
chisq_test(barrier_df, `Response` ~ Barrier)
# which barriers are more often cited?
# calculate overall proportions of each challenge level
barrier_props_df <- barrier_df %>%
count(`Response`, name = "count") %>%
mutate(proportion = count/sum(count))
barrier_props <- barrier_props_df %>% pull()
# conduct goodness of fit test for each barrier
test_results_bar <- unique(barrier_df$Barrier) %>%
map_df(function(x) {
barrier_df %>%
filter(Barrier == x) %>%
chisq_test(response = Response,
p = barrier_props)
}
)
test_results_bar <- test_results_bar %>%
mutate(Barrier = unique(barrier_df$Barrier),
adj.p_val = p.adjust(p_value, method = "fdr")
) %>%
select(Barrier, everything()) %>%
mutate(`adj. p-val` = if_else(adj.p_val < 0.01, "*", ""))
sig_results_bar <- test_results_bar %>% filter(adj.p_val < 0.01)
# Retrieve "I lack" level of challenge columns, replace names & make data tidy
Challenge_df <- Merged_Data %>%
select(`I lack expertise in bioinformatics`= `I lack expertise in bioinformatics.`,
`I lack experience teaching bioinformatics` = `I lack experience in teaching bioinformatics....22`,
`I lack time to restructure course(s)` = `I lack time to restructure course(s).`,
`I lack autonomy to add course content` = `I lack the autonomy to add content to my course(s)....24`,
`I lack space to add course content` = `I lack space in my course(s) to add content....25`,
`I lack curricular materials` = `I lack curricular materials....26`,
`I lack technical resources` = `I lack appropriate technical resources (internet access/software/hardware/IT support)....27`,
`My students lack technical resources` = `My student population lacks access to appropriate technical resources (internet access/software/hardware/IT support)....28`,
`My students lack prerequisite skills` = `My student population lacks prerequisite skills`,
`My students lack interest` = `My student population lacks interest in bioinformatics....30`) %>%
pivot_longer(everything(), names_to = "Barrier", values_to = "Level of Challenge") %>%
mutate(`Level of Challenge` = replace_na(`Level of Challenge`, "Not a challenge")) %>%
mutate(`Level of Challenge` = factor(`Level of Challenge`,
levels = c("Severe challenge", "Moderate challenge",
"Minor challenge", "Not a challenge")
))
# association between barrier and level challenge?
chisq_test(Challenge_df, `Level of Challenge` ~ Barrier)
# which barriers are significantly greater challenge?
# calculate overall proportions of each challenge level
challenge_props_df <- Challenge_df %>%
count(`Level of Challenge`, name = "count") %>%
mutate(proportion = count/sum(count))
challenge_props <- challenge_props_df %>% pull()
# conduct goodness of fit test for each barrier
test_results <- unique(Challenge_df$Barrier) %>%
map_df(function(x) {
Challenge_df %>%
filter(Barrier == x) %>%
chisq_test(response = `Level of Challenge`,
p = challenge_props)
}
)
test_results <- test_results %>%
mutate(Barrier = unique(Challenge_df$Barrier),
adj.p_val = p.adjust(p_value, method = "fdr")
) %>%
select(Barrier, everything()) %>%
mutate(`adj. p-val` = if_else(adj.p_val < 0.01,
paste0("adj. p-val = ", as.character(signif(adj.p_val, 2))),
""))
sig_results <- test_results %>% filter(adj.p_val < 0.01)
# plot challenge levels for each barrier
# Challenge_df %>%
# ggplot(aes(x=`Level of Challenge`)) +
# geom_bar() +
# labs(y = "Number of responses", x= "") +
# geom_text(data = test_results,
# mapping = aes(label = `adj. p-val`, x = 4, y = 400), size = 6) +
# coord_flip() +
# facet_wrap(vars(Barrier), ncol = 2) +
# theme_fivethirtyeight(base_size = 20, base_family = "sans") +
# theme(panel.background = element_rect(fill = "white")) +
# theme(plot.background = element_rect(fill = "white")) +
# theme(axis.title.x=element_blank(),
# axis.ticks.x=element_blank()) +
# theme(strip.text.x = element_text(size = 18, face = "bold"))+
# theme(axis.line = element_line(colour = "black",
# linewidth = 0.5, linetype = "solid"))+
# theme(panel.grid.major.y = element_blank(),
# panel.grid.minor.y = element_blank()) +
# theme(axis.line.x = element_blank()) +
# theme(axis.text.y = element_text(hjust = 1.1))
# plot challenge level percentages for all barriers
challenge_percents <- Challenge_df %>%
group_by(Barrier) %>%
count(`Level of Challenge`, name = "count") %>%
mutate(proportion = (count/sum(count)))
challenge_percents_Not <- challenge_percents %>%
filter(`Level of Challenge` == "Not a challenge") %>%
count(Barrier, wt=proportion, name = "Not_pct") %>%
arrange(desc(Not_pct))
challenge_percents_ModSev <- challenge_percents %>%
filter(`Level of Challenge` %in% c("Moderate challenge", "Severe challenge")) %>%
count(Barrier, wt=proportion, name = "Sum_Mod_Sev") %>%
arrange(Sum_Mod_Sev)
# generate plot with flipped coordinates, reordering variables
greys <- c("#595959",
"#778899",
"#a6a6a6",
"#ededed")
# "#DCDCDC")
# plot `Level of Challenge` for each barrier
# challenge_percents %>%
# ggplot(aes(x=`Level of Challenge`,
# y=proportion,
# # fill=`Level of Challenge` # this conflicts with geom_text()
# )) +
# geom_bar(stat = "Identity") +
# # light bars to indicate overall proportions
# # geom_bar(data = challenge_props_df,
# # aes(x=`Level of Challenge`,
# # y=proportion, fill = "grey", alpha = 0.3),
# # stat = "Identity") +
# labs(y = "percentage of respondents", x= "")+
# facet_wrap(vars(factor(Barrier)), ncol = 2) +
# scale_y_continuous(labels = scales::percent) +
# theme_gray(base_size = 20, base_family = "sans") +
# theme(line = element_line(colour = "black"),
# rect = element_rect(fill = "white", linetype = 0, colour = NA))+
# theme(legend.position = "None") +
# theme(panel.grid.major =
# element_line(colour = "grey"),
# panel.grid.minor = element_blank(),
# # unfortunately, can't mimic subtitles
# plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold"),
# plot.margin = unit(c(1, 1, 1, 1), "lines"),
# strip.background = element_rect())+
# theme(axis.title.x=element_blank(),
# axis.ticks.x=element_blank()) +
# theme(strip.text.x = element_text(size = 18, face = "bold"))+
# theme(plot.background = element_rect(fill = "white"))+
# theme(panel.background = element_rect(fill = "white"))+
# theme(panel.grid.major.y = element_blank())+
# theme(axis.line = element_line(colour = "black", linewidth = 0.5))+
# geom_text(data = test_results,
# mapping = aes(label = `adj. p-val`, x = 4, y = 0.75), size = 6) +
# coord_flip()+
# # scale_fill_manual(values = greys, labels= unique(challenge_percents$`Level of Challenge`))+
# theme(axis.text.y = element_text(hjust = 1.1)) +
# theme(panel.grid.minor=element_blank())
#
# ggsave() the last plot displayed
# ggsave("figure_02_survey2.png",
# units = "in",
# height = 15,
# width = 18)
# plot `Level of Challenge` for each barrier
challenge_percents %>%
ggplot(aes(x=factor(Barrier,
# levels = challenge_percents_ModSev$Barrier
levels = unique(challenge_percents_Not$Barrier)
),
y=proportion,
fill=`Level of Challenge` # this conflicts with geom_text()
)) +
geom_bar(stat = "Identity") +
labs(y = "", x= "")+
# facet_wrap(vars(factor(Barrier)), ncol = 2) +
scale_x_discrete(limits = (levels(challenge_percents$Barrier))) +
scale_y_continuous(n.breaks=6, labels = scales::percent) +
theme_gray(base_size = 24, base_family = "sans") +
theme(line = element_line(colour = "black"),
rect = element_rect(fill = "white", linetype = 0, colour = NA))+
theme(legend.text = element_text(size = 20),
legend.background = element_rect(),
legend.position = "bottom",
legend.title = element_blank()) +
guides(fill=guide_legend(reverse = TRUE))+
theme(panel.grid.major =
element_line(colour = "grey"),
panel.grid.minor = element_blank(),
# unfortunately, can't mimic subtitles
plot.title = element_text(hjust = 0, size = rel(1.5), face = "bold"),
plot.margin = unit(c(1, 1, 1, 1), "lines"),
strip.background = element_rect())+
theme(axis.title.x=element_blank(),
axis.ticks.x=element_blank()) +
theme(plot.background = element_rect(fill = "white"))+
theme(panel.background = element_rect(fill = "white"))+
theme(panel.grid.major.y = element_blank())+
theme(axis.line = element_line(colour = "black", linewidth = 0.5))+
# geom_text(data = test_results_bar,
# mapping = aes(label = `adj. p-val`, x = Barrier, y = 0.75), size = 6) +
coord_flip()+
scale_fill_manual(values = greys, labels= unique(challenge_percents$`Level of Challenge`))+
# theme(axis.text.y = element_text(hjust = 1.1)) +
theme(panel.grid.minor=element_blank())
# ggsave() the last plot displayed
ggsave("figure_02alt_survey2.png",
units = "in",
height = 8,
width = 18)