-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
1,431 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: No | ||
AlwaysSaveHistory: No | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# Meta-analysis of the rodent object-in-context task: Get flowchart numbers | ||
# Written by Milou Sep | ||
|
||
rm(list=ls()) | ||
library(dplyr) | ||
library(osfr) # to interact with Open Science Framework | ||
# instructions: https://github.com/CenterForOpenScience/osfr | ||
# Authenticate to OSF (see: http://centerforopenscience.github.io/osfr/articles/auth.html). Via osf_auth("PAT") in the commentline. (note PAT can be derived from OSF) | ||
library(readxl) | ||
|
||
|
||
# retrieve OSF files ------------------------------------------------------ | ||
#search hits | ||
osf_retrieve_file("dvwhn") %>% osf_download(path = "data", conflicts="overwrite") # search hits search string thesis MV | ||
osf_retrieve_file("umz5k") %>% osf_download(path = "data", conflicts="overwrite") # search hits new search string (25.5.20) | ||
# Screening | ||
osf_retrieve_file("j7sfm") %>% osf_download(path = "data", conflicts="overwrite") # screening search string thesis MV | ||
osf_retrieve_file("bz3sj") %>% osf_download(path = "data", conflicts="overwrite") # screening new search string | ||
# data extraction file | ||
osf_retrieve_file("qadch") %>% osf_download(path = "data", conflicts="overwrite") | ||
|
||
# Load data --------------------------------------------------------------- | ||
# search documents (PMID's) | ||
search1.thesis <- read.table("data/hits.search.thesis.MV.txt", quote="\"", comment.char="") | ||
search2.meta <- read.table("data/hits.new.search.meta.oic.v25.5.20.txt", quote="\"", comment.char="") | ||
# screening documents | ||
screeing.search1 <-read.csv2("data/Screening S1 thesis search PMIDs.csv", na.strings = c("")) | ||
screeing.search2 <-read.csv2("data/Screening S2 new.in.new.search.PMIDs.csv", na.strings = c("")) | ||
# data extraction file | ||
read_excel("data/280121_Data_Extraction_RoB.xlsx", sheet = "Extraction_converted", na=c("N/A", "#VALUE!"))->data | ||
|
||
|
||
# Compare PMIDs ----------------------------------------------------------- | ||
search1.thesis$V1[!search1.thesis$V1 %in% search2.meta$V1]-> not.in.new | ||
search2.meta$V1[!search2.meta$V1 %in% search1.thesis$V1]->new.in.new | ||
search2.meta$V1[search2.meta$V1 %in% search1.thesis$V1]->old.in.new | ||
|
||
# Count hits and screened papers | ||
nrow(search2.meta) #253 | ||
nrow(search1.thesis) # 54 | ||
nrow(screeing.search1) # 54 | ||
length(new.in.new) #219 | ||
nrow(screeing.search2) # 219 (so correct) | ||
length(not.in.new) # 20 | ||
length(old.in.new) # 34 | ||
#new in new + old in new | ||
219+34 # =253 | ||
# not in new + old in new | ||
20+34 # = 54 | ||
|
||
|
||
# Counts for Flowchart ---------------------------------------------------- | ||
# Total | ||
screeing.search1 %>% filter(PMID %in% old.in.new) %>% nrow()# 34 | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% nrow() # 219 | ||
34+219 #= 253 | ||
|
||
|
||
# Counts search 1 (thesis) ------------------------------------------------ | ||
# str(screeing.search1) | ||
|
||
screeing.search1 %>% | ||
filter(PMID %in% old.in.new) %>% | ||
filter(inclusie_july2020 == "yes") %>% nrow() # 30 included | ||
|
||
screeing.search1 %>% | ||
filter(PMID %in% old.in.new) %>% | ||
filter(full.text.checked.july2020 == "no") %>% #nrow()# 26 | ||
filter(inclusie_july2020 == "yes") %>% nrow()# 26 included without full text check | ||
|
||
screeing.search1 %>% | ||
filter(PMID %in% old.in.new) %>% | ||
filter(full.text.checked.july2020 == "yes") %>% #nrow()# 8 full text checked | ||
filter(inclusie_july2020 == "yes") # of that 4 included (so 4 excluded) | ||
|
||
screeing.search1 %>% | ||
filter(PMID %in% old.in.new) %>% | ||
filter(inclusie_july2020 == "no?" | inclusie_july2020 == "no") # 4 excluded (reason for all: no OIC) | ||
|
||
|
||
# Counts search 2 (new) --------------------------------------------------- | ||
# str(screeing.search2) | ||
|
||
#total | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% | ||
filter(final.inclusion.screening == "Yes") %>% nrow() # 10 included | ||
|
||
#included without full text check | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% #View() | ||
filter(full.text.checked.MS == "no") %>% #nrow()# 140 full text NOT checked | ||
filter(final.inclusion.screening == "Yes") # of that 10 included | ||
|
||
# excluded without full text check in s2 (+ reasons for exclusion) | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% #View() | ||
filter(full.text.checked.MS == "no") %>% | ||
filter(final.inclusion.screening == "No") %>% #nrow()# 130 | ||
mutate(Reason.for.exclusion = factor(Reason.for.exclusion))%>% | ||
select(Reason.for.exclusion) %>% table() | ||
|
||
# included after full text check | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% | ||
filter(full.text.checked.MS == "yes"| is.na(full.text.checked.MS)) %>% #nrow()# 79 full text checked | ||
filter(final.inclusion.screening == "Yes") # of that 0 included | ||
|
||
# excluded after full text check in s2 (+ reason for exclusion) | ||
screeing.search2 %>% filter(PMID %in% new.in.new) %>% | ||
filter(full.text.checked.MS == "yes"| is.na(full.text.checked.MS)) %>% | ||
filter(final.inclusion.screening == "No") %>% #nrow() # 79 excluded | ||
mutate(Reason.for.exclusion = factor(Reason.for.exclusion))%>% | ||
select(Reason.for.exclusion) %>% table() | ||
|
||
|
||
|
||
# Unique inclusions thesis search + new search ---------------------------- | ||
30 + 10 # total 40 inclusions following screening | ||
|
||
|
||
# Compare to numbers in data extraction file ------------------------------ | ||
data %>% distinct(PMID) %>%nrow() # 41 studies | ||
|
||
|
||
# Difference inclusions vs data extraction: snowballing ------------------- | ||
screeing.search1 %>% filter(inclusie_july2020 == "yes") %>% select(PMID) ->inclusions.S1 | ||
screeing.search2 %>% filter(final.inclusion.screening == "Yes") %>% select(PMID)->inclusions.S2 | ||
c(inclusions.S1 , inclusions.S2) %>% unlist() -> inclusions | ||
|
||
data %>% filter(!PMID %in% inclusions) # 1 identified via snowballing | ||
|
||
# To check: all inclusions from screening files are in dataset | ||
inclusions[(!inclusions %in% data$PMID)] | ||
|
||
|
||
# Total inclusions -------------------------------------------------------- | ||
# of these 41 studies, 4 studies are included in the systematic review but excluded for the meta-analyses due to missing data. Also see the prepare_data.rmd script. | ||
|
||
|
||
# save file local --------------------------------------------------------- | ||
# Note these files are also on OSF (https://osf.io/d9eu4/) | ||
write.table(not.in.new, file = "processed_data/not.in.new.search.csv", sep = ';', row.names = F) | ||
write.table(new.in.new, file = "processed_data/new.in.new.search.csv", sep = ';', row.names = F) | ||
write.table(old.in.new, file = "processed_data/old.in.new.search.csv", sep = ';', row.names = F) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
--- | ||
title: "Meta-analysis of the rodent object-in-context task: Random forest-based Meta-analysis (MetaForest)" | ||
author: "Milou Sep" | ||
date: "`r format(Sys.time(), '%d %B, %Y')`" | ||
output: html_document | ||
--- | ||
|
||
# Prepare environment | ||
```{r setup, include=FALSE} | ||
rm(list = ls()) #clean environment | ||
# packages | ||
library(dplyr) | ||
library(caret) #for cross-validation | ||
library(metaforest) #for exploratory analysis | ||
# to check which packages are loaded | ||
# (.packages()) | ||
``` | ||
|
||
# Metaforest | ||
https://www.rdocumentation.org/packages/metaforest/versions/0.1.3 | ||
|
||
## Data preparation | ||
```{r metaforest preparation} | ||
readRDS("processed_data/data_with_effect_size.RDS") ->data6 | ||
str(data6) | ||
# create dataset for MetaForest | ||
data6 %>% | ||
select( -c(PMID, EXP_group, | ||
SmellCntrl, # only 1 level, so non-informative | ||
nC, | ||
# other outcome variables | ||
DR, DR_A, DR_B, SEM, SEM_DR_A, SEM_DR_B, SD, label, | ||
no.Affiliation, # captures mostly the same variation as `PMID` variable (no.Affiliation: 29 levels, PMID: 37 levels) | ||
Program # captures mostly the same variation as in the 'scoring' variable. | ||
)) %>% | ||
droplevels() -> datForest | ||
is.na(datForest) %>% any() | ||
``` | ||
|
||
## Save data for WS follow-up plots | ||
```{r save data WS plots} | ||
data6 %>% select(nC, ID, PMID)->sample.sizes | ||
full_join(sample.sizes, datForest, by="ID")->datForest_for_WS_Plot | ||
saveRDS(datForest_for_WS_Plot, "processed_data/datForest_for_WS_Plot.RDS") | ||
``` | ||
|
||
## MetaForest: Tuning | ||
Note, tuning assumes that convergence is reached. To set-up metaforest, also see: https://www.rdocumentation.org/packages/metaforest/versions/0.1.3/topics/ModelInfo_mf | ||
```{r metaforest tuning} | ||
set.seed(94372) | ||
ModelInfo_mf()$notes # information on method from MetaForest | ||
# Set up 9-fold grouped cross-validation | ||
fit_control <- trainControl(method = "cv", number = 9, index = groupKFold(datForest$ID, k = 9)) | ||
# Set up a custom tuning grid for the three tuning parameters of MetaForest | ||
rf_grid <- expand.grid(whichweights = c("random", "fixed", "unif"), | ||
mtry = c(2, 4, 6), | ||
min.node.size = c(2, 4, 6)) | ||
``` | ||
|
||
## MetaForest: Train Model | ||
Cross-validated clustered MetaForest | ||
```{r MetaForest training} | ||
set.seed(94372) | ||
# Train the model | ||
cv.mf.cluster <- train(y = datForest$yi, x = datForest[names(datForest) !="yi"], | ||
study = "ID", | ||
method = ModelInfo_mf(), #= the method from MetaForest. (https://rdrr.io/cran/metaforest/man/MetaForest.html) | ||
trControl = fit_control, | ||
tuneGrid = rf_grid, | ||
num.trees = 600, | ||
data=datForest) | ||
# Save the model | ||
saveRDS(cv.mf.cluster,file="processed_data/fitted.MetaForest.RDS") | ||
``` | ||
|
||
To load saved model directly | ||
```{r eval=FALSE, include=FALSE} | ||
readRDS("processed_data/fitted.MetaForest.RDS")->cv.mf.cluster | ||
``` | ||
|
||
## MetaForest: Inspect model | ||
|
||
### Tuning parameters | ||
#### Show model: The final values used for the model were whichweights = unif, mtry = 4 and min.node.size = 2 | ||
```{r inspect model 1} | ||
cv.mf.cluster | ||
``` | ||
#### Plot grid parameter models | ||
```{r inspect model 2} | ||
plot(cv.mf.cluster) | ||
``` | ||
|
||
### Model Summary | ||
```{r inspect model 3} | ||
summary(cv.mf.cluster) | ||
``` | ||
|
||
### R(oob) | ||
https://towardsdatascience.com/what-is-out-of-bag-oob-score-in-random-forest-a7fa23d710 | ||
```{r inspect model 4} | ||
cv.mf.cluster$finalModel | ||
``` | ||
|
||
### Cross validated R2 with SD | ||
details for the "best" model: unif, mtry = 4 and min.node.size = 2 | ||
```{r inspect model 5} | ||
cv.mf.cluster$results[which(cv.mf.cluster$results$whichweights == "unif" & | ||
cv.mf.cluster$results$mtry == 4 & | ||
cv.mf.cluster$results$min.node.size == 2),] | ||
``` | ||
|
||
### Convergence plot | ||
Good model convergence | ||
```{r convergence plot} | ||
jpeg(file="results/metaforest_convergencePlot.jpeg", height = 3000, width = 3000, res=500) | ||
plot(cv.mf.cluster$finalModel) | ||
dev.off() | ||
``` | ||
|
||
## Variable importance (based on metaForest) | ||
```{r variable importance plot} | ||
tiff(file="results/metaforest_varImportance.tiff", height=1500, width=1500, pointsize=8, res=300) | ||
VarImpPlot(cv.mf.cluster$finalModel)+ | ||
geom_hline(yintercept=11.5) + | ||
annotate(geom="text", x=0.0017, y=12.5, label="Top 50%", color="darkblue") | ||
dev.off() | ||
``` | ||
|
||
## Export important variables | ||
```{r metaforest varimportance scores} | ||
varImp(cv.mf.cluster) ->imp.scores | ||
# Export variable importance scores (ordered) to csv | ||
imp <- as.data.frame(imp.scores$importance) | ||
imp <- data.frame(overall = imp$Overall, | ||
names = rownames(imp)) | ||
write.csv2(imp[order(imp$overall,decreasing = T),],"results/important_variables_metaforest.csv") | ||
# Select top 50% most important variables | ||
imp[order(imp$overall,decreasing = T),] %>% | ||
slice_head(.,n=nrow(.) / 2) -> important.vars | ||
important.vars$names ->variable | ||
variable | ||
# Save top 50% most important variables | ||
saveRDS(variable,"processed_data/important_variables.RDS") | ||
``` |
Oops, something went wrong.