forked from OpenAPC/openapc-de
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unidue_include.r
113 lines (74 loc) · 2.87 KB
/
unidue_include.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
require(plyr)
require(RJSONIO)
require(RCurl)
unidue <- read.csv("data/unidue/unidue13.csv", header = TRUE, sep=",")
unidue.doi <- unidue[!is.na(unidue$doi),]
source("R/doi_fetch.r")
tt.doi <- ldply(unidue.doi$doi, doi_fetch, .inform = TRUE)
# transform:
matches <- match(unidue$doi,tt.doi$doi)
# factor levels to character
unidue[,c("Publisher","Journal")] <- sapply(unidue[,c("Publisher","Journal")], as.character)
#publisher
unidue$Publisher[!is.na(matches)] <- as.character(tt.doi$publisher[matches[!is.na(matches)]])
#journal
unidue$Journal[!is.na(matches)] <- as.character(tt.doi$journal[matches[!is.na(matches)]])
#issn
unidue$issn.1 <- as.character(unidue$issn.1)
unidue$issn.1[!is.na(matches)] <- as.character(tt.doi$ISSN.1[matches[!is.na(matches)]])
unidue$issn.2[!is.na(matches)] <- as.character(tt.doi$ISSN.2[matches[!is.na(matches)]])
# manual clean up ambigue crossref publsiher and journal names
unidue$indexed_in_CrossRef <- unidue$doi %in% tt.doi$doi
# get pmid with rebi
require(devtools)
install_github("rebi", "ropensci")
library(rebi)
my.doi <- unidue$doi
my.pmc <- ldply(my.doi, function(x) search_publications(query = paste("doi:", x, sep="")))
my.pmc <- my.pmc[,c("pmid", "pmcid", "doi")]
#dirty handle NULL
my.pmc$pmcid <- as.character(my.pmc$pmcid)
my.pmc[my.pmc$pmcid == "NULL", "pmcid"] <- NA
my.pmc$pmid <- unlist(my.pmc$pmid)
my.tmp <- merge(unidue, my.pmc, by.x="doi", by.y="doi", all.x = T)
# emtpy columns
my.tmp$record.id <- NA
my.tmp$base.url <- NA
my.tmp$ut <- NA
# a bit of sorting
my.df <- my.tmp[,c("Institution", "Period", "EURO", "Publisher",
"Journal", "issn.1", "issn.2", "doi",
"indexed_in_CrossRef","pmid", "pmcid",
"record.id", "base.url", "ut")]
my.all <- read.csv("data/apc_de.csv", header = T, sep =",")
my.all <- my.all[,-c(1)]
colnames(my.df) <- colnames(my.all)
my.all.t <- rbind(my.all, my.df)
write.csv(my.all.t, "data/apc_de.csv", row.names = FALSE)
# make a sankey for hannover
uniduean <- my.df[my.df$uni == "Universität Duisburg-Essen",]
#select columnsh
uniduean <- uniduean[,c("Publisher", "Journal", "EURO")]
#rename
colnames(uniduean) <- c("source", "target", "value")
uniduean$value <- as.numeric(uniduean$value)
#get affiliation
tt <- as.data.frame(as.matrix((tapply(uniduean$value,uniduean$source, sum))))
tt$target <- rownames(tt)
tt$source <- rep("Universität Duisburg-Essen", times = nrow(tt))
colnames(tt) <- c("value", "target", "source")
uniduean.sub <- rbind(tt, uniduean)
#now we finally have the data in the form we need
sankeyPlot <- rCharts$new()
sankeyPlot$setLib('http://timelyportfolio.github.io/rCharts_d3_sankey')
sankeyPlot$set(
data = uniduean.sub,
nodeWidth = 15,
nodePadding = 10,
layout = 32,
width = 960,
height = 800,
unit = "EURO",
title= "Author fees paid by Leibniz Universität Hannover Publication Fund since 2013"
)
sankeyPlot