-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.r
83 lines (70 loc) · 2.64 KB
/
utils.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#' Check data availability for the individual source
#'
#' @param ein An Employment Identification Numbers
#' @param year A year in which a form was filed. The default value is 2019.
#' @param source The data source. Four options exist: "irs", "website", "twitter", "facebook."
#' @return A datafram that contains four columns: EIN, its data source, availability, and the index year when it became available. Data availability column is a dummy variable. 1 = data exist. 0 = data don't exist.
#' @export
#'
check_data_availability <- function(ein, year = 2019, source = c("irs", "website", "twitter", "facebook")) {
if (source %in% c("irs", "website", "twitter", "facebook") & is.null(get_aws_url(ein, year))) {
out <- data.frame(
"EIN" = ein,
"Source" = source,
"Availability" = 0,
"Year" = year
)
return(out)
} else {
if (source == "irs") {
return <- ifelse(sum(class(get_990(ein, year)) %in% c("XMLNode")) >= 1, 1, 0)
out <- data.frame(
"EIN" = ein,
"Source" = source,
"Availability" = return,
"Year" = year
)
}
if (source == "website") {
website <- get_value_990(get_990(ein = ein, year), "website")
out <- data.frame(
"EIN" = ein,
"Source" = source,
"Availability" = ifelse(is.na(website), 0, 1),
"Year" = year
)
}
if (source == "twitter") {
website <- get_value_990(get_990(ein = ein, year), "website")
out <- data.frame(
"EIN" = ein,
"Source" = source,
"Availability" = ifelse(is.na(website), 0, ifelse(is.na(find_twitter_handle_from_org_page(website)), 0, 1)),
"Year" = year
)
}
if (source == "facebook") {
website <- get_value_990(get_990(ein = ein, year), "website")
out <- data.frame(
"EIN" = ein,
"Source" = source,
"Availability" = ifelse(is.na(website), 0, ifelse(is.na(find_twitter_handle_from_org_page(website)), 0, 1)),
"Year" = year
)
}
return(out)
}
}
#' Check data availability for IRS and website
#'
#' @param ein An Employment Identification Numbers
#' @param year A year in which a form was filed. The default value is 2019.
#' @param sources The data sources. The default value is `c("irs", "website")`.
#' @importFrom purrr map_dfr
#' @return A datafram that contains three columns: EIN, its data source and availability. Data availability column is a dummy variable. 1 = data exist. 0 = data don't exist.
#' @export
#'
check_data_irs_web <- function(ein, year = 2019, sources = c("irs", "website")) {
out <- map_dfr(sources, ~ check_data_availability(ein, year, .))
return(out)
}