/
tongfen_ca_deprecated.R
190 lines (168 loc) · 8.08 KB
/
tongfen_ca_deprecated.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#' Canadian census CT level tongfen
#'
#' @description
#' \lifecycle{deprecated}
#'
#' Grab variables from several censuses on a common geography. Requires sf package to be available
#' Will return CT level data
#'
#' @param regions census region list, should be inclusive list of GeoUIDs across censuses
#' @param vectors List of cancensus vectors, can come from different census years
#' @param geo_format geographic format for returned data, 'sf' for sf format and `NA``
#' @param na.rm remove NA values when aggregating up values, default is `TRUE`
#' @param quiet suppress download progress output, default is `FALSE`
#' @param refresh optional character, refresh data cache for this call
#' @return dataframe with census variables on common geography
#' @export
get_tongfen_census_ct <- function(regions,
vectors,
geo_format = NA,
na.rm = TRUE,
quiet = TRUE,
refresh = FALSE){
lifecycle::deprecate_warn("0.2.0", "get_tongfen_census_ct()", "get_tongfen_ca_census()")
#warning("This method is deprecated, use `get_tongfen_ca_census(regions,vectors,level='CT', method = 'identifier', tolerance = 500, ...)` instead")
meta <- meta_for_ca_census_vectors(vectors)
base_geo <- ifelse(is.na(geo_format),NULL,meta$geo_dataset %>% unique %>% sort %>% first )
get_tongfen_ca_census(regions = regions,
meta = meta,
level = 'CT',
method = 'identifier',
tolerance = 500,
base_geo = base_geo,
na.rm = na.rm,
quiet = quiet,
refresh = refresh)
}
#' Canadian Census DA level tongfen
#'
#' @description
#' \lifecycle{deprecated}
#'
#' Grab variables from several censuses on a common geography. Requires sf package to be available
#' Will return CT level data
#'
#' @param regions census region list, should be inclusive list of GeoUIDs across censuses
#' @param vectors List of cancensus vectors, can come from different census years
#' @param geo_format `NA` to only get the variables or 'sf' to also get geographic data
#' @param na.rm logical, determines how NA values should be treated when aggregating variables
#' @param use_cache logical, passed to `cancensus::get_census` to regulate caching
#' @param quiet suppress download progress output, default is `TRUE`
#' @return dataframe with variables on common geography
#' @export
get_tongfen_census_da <- function(regions,vectors,geo_format=NA,use_cache=TRUE,na.rm=TRUE,quiet=TRUE) {
lifecycle::deprecate_warn("0.2.0", "get_tongfen_census_da()", "get_tongfen_ca_census()")
meta <- meta_for_ca_census_vectors(vectors)
base_geo <- ifelse(is.na(geo_format),NULL,meta$geo_dataset %>% unique %>% sort %>% first )
get_tongfen_ca_census(regions = regions,
meta = meta,
level = 'DA',
method = 'statcan',
base_geo = base_geo,
na.rm = na.rm,
quiet = quiet,
refresh = !use_cache)
}
#' Canadian census CT level tongfen via DA correspondence
#'
#' @description
#' \lifecycle{deprecated}
#'
#' Grab variables from several censuses on a common geography. Requires sf package to be available
#' Will return CT level data
#'
#' @param regions census region list, should be inclusive list of GeoUIDs across censuses
#' @param vectors List of cancensus vectors, can come from different census years
#' @param geo_format `NA` to only get the variables or 'sf' to also get geographic data
#' @param use_cache logical, passed to `cancensus::get_census` to regulate caching
#' @param quiet suppress download progress output, default is `TRUE`
#' @param na.rm logical, determines how NA values should be treated when aggregating variables
#' @return dataframe with variables on common geography
#' @export
get_tongfen_ca_census_ct_from_da <- function(regions,vectors,geo_format=NA,use_cache=TRUE,na.rm=TRUE,quiet=TRUE) {
lifecycle::deprecate_warn("0.2.0", "get_tongfen_census_da()", "get_tongfen_census_ca()")
meta <- meta_for_ca_census_vectors(vectors)
base_geo <- ifelse(is.na(geo_format),NULL,meta$geo_dataset %>% unique %>% sort %>% first )
get_tongfen_ca_census(regions = regions,
meta = meta,
level = 'CT',
method = 'statcan',
base_geo = base_geo,
na.rm = na.rm,
quiet = quiet,
refresh = !use_cache)
}
#' Canadian census CT level tongfen via identifier matching
#'
#' @description
#' \lifecycle{deprecated}
#'
#' Aggregate variables to common CTs, returns data2 on new tiling matching data1 geography
#' @param data1 cancensus CT level datatset for year1 < year2 to serve as base for common geography
#' @param data2 cancensus CT level datatset for year2 to be aggregated to common geography
#' @param data2_sum_vars vector of variable names to by summed up when aggregating geographies
#' @param data2_group_vars optional vector of grouping variables
#' @param na.rm optional parameter to remove NA values when summing, default = `TRUE`
#' @export
tongfen_ca_census_ct <- function(data1,data2,data2_sum_vars,data2_group_vars=c(),na.rm=TRUE) {
lifecycle::deprecate_warn("0.2.0", "tongfen_ca_census_ct()", "tongfen_aggregate()")
cts_1 <- data1$GeoUID
cts_2 <- data2$GeoUID
cts_diff_1 <- setdiff(cts_1,cts_2) %>% sort
cts_diff_2 <- setdiff(cts_2,cts_1) %>% sort
d<-st_intersection(
data2 %>% filter(.data$GeoUID %in% cts_diff_2) %>%
rename(GeoUID2=.data$GeoUID) %>%
select(.data$GeoUID2) %>% mutate(area2=st_area(.data$geometry)),
data1 %>% filter(.data$GeoUID %in% cts_diff_1) %>%
select(.data$GeoUID) %>% mutate(area=st_area(.data$geometry))
)
d <- d %>% mutate(area3=st_area(.data$geometry)) %>%
mutate(ratio=as.numeric(.data$area3/.data$area2)) %>%
filter(.data$ratio>0.1) %>%
arrange(.data$ratio)
dd<- d %>%
sf::st_set_geometry(NULL) %>%
group_by(.data$GeoUID) %>%
summarize(ratio=sum(.data$ratio)/n(),n=n())
if(dd %>% filter(.data$n<=1) %>% nrow >0) {base::stop("problem with computing common ct data")}
ct_translation <- lapply(split(d, d$GeoUID), function(x) x$GeoUID2 %>% unique)
ct_translation2 <- lapply(split(d, d$GeoUID2), function(x) x$GeoUID %>% unique)
new2 <- data2 %>%
filter(.data$GeoUID %in% cts_diff_2) %>%
mutate(GeoUID2=.data$GeoUID) %>%
mutate(GeoUID=as.character(ct_translation2[.data$GeoUID2])) %>%
group_by(!!!c("GeoUID",data2_group_vars) %>% purrr::map(as.name) %>% unlist)
nnew <- new2 %>% summarize_at(data2_sum_vars,function(x)sum(x,na.rm=na.rm))
data_2 <- rbind(data2 %>% filter(!(.data$GeoUID %in% cts_diff_2)) %>%
select("GeoUID",data2_group_vars,data2_sum_vars), nnew)
return(data_2)
}
#' Get StatCan DA or DB level correspondence file
#'
#' @description
#' \lifecycle{deprecated}
#' Joins the StatCan correspodence files for several census years
#'
#' @param years list of census years
#' @param level geographic level, DA or DB
#' @param refresh reload the correspondence files, default is `FALSE`
#' @return tibble with correspondence table`spanning all years
get_correspondence_ca_census_for <- function(years,level,refresh=FALSE){
#if (length(years)!=2) stop("Sorry, right now this only works for two years")
years<-as.integer(years)
all_years=seq(min(years),max(years),5)[-1]
d <- get_single_correspondence_ca_census_for(all_years[1],level,refresh) %>%
rename(!!paste0("flag",all_years[1]):=.data$flag)
all_years=all_years[-1]
while (length(all_years)>0) {
d <- left_join(d,get_single_correspondence_ca_census_for(all_years[1],level,refresh) %>%
rename(!!paste0("flag",all_years[1]):=.data$flag))
all_years=all_years[-1]
}
dd<-d %>% select_if(grepl(years %>% paste0(collapse = "|"),names(.))) %>%
select_if(!grepl("flag",names(.))) %>%
unique
ddd <- get_tongfen_correspondence(dd)
ddd
}