This repository has been archived by the owner on Jan 25, 2023. It is now read-only.
/
auto_style.R
251 lines (181 loc) · 9.12 KB
/
auto_style.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# Functions in this file are used to attempt to auto-detect styles from the information in the table
#' Use the data type of the columns to choose an automatic Excel format
#' Number formats are only applied to body cells
#'
#' Note this reads currency styling from the styles defined [here](https://github.com/moj-analytical-services/xltabr/blob/dev/inst/extdata/number_formats.xlsx?raw=true)
#' @param tab a table object
#' @param overrides a list containing any manual overrides where the user wants to provide their own style name
#' @example auto_style_number_formatting(overrides = list("colname1" = "currency1"))
#' @export
auto_style_number_formatting <- function(tab, overrides = list()) {
# Want to add number style to meta_col_ on body only - tab$body$meta_col_
col_classes <- sapply(tab$body$body_df_to_write, class)
# This lookup table coverts
path <- system.file("extdata", "number_format_defaults.csv", package = "xltabr" )
lookup_df <- read.csv(path, stringsAsFactors = FALSE)
# Convert to a named vector that can be used as a lookup
lookup <- lookup_df$style_name
names(lookup) <- lookup_df$class
# Iterate through body columns applying lookup or override if exists
body_cols <- names(tab$body$body_df_to_write)
for (this_col_name in body_cols) {
if (this_col_name %in% names(overrides)) {
this_style <- overrides[[this_col_name]]
} else {
this_style <- lookup[col_classes[[this_col_name]][1]]
if (is.na(this_style)) {
stop(paste0("Trying to autoformat column of class ", col_classes[[this_col_name]], "but no style defined in number_format_defaults.csv"))
}
}
if (is_null_or_blank(tab$body$meta_col_[this_col_name])) {
tab$body$meta_col_[this_col_name] <- this_style
} else {
tab$body$meta_col_[this_col_name] <- paste(tab$body$meta_col_[this_col_name], lookup[this_col_name], sep = "|")
}
}
tab
}
# Uses the presence of '(all)' in the leftmost columns of data to detect that these
# columns are really left headers rather than body colummns
auto_style_body_rows <- function(tab, indent = FALSE, keyword = "(all)") {
# If headers haven't been provided by the user, attempt to autodetect them
if (is.null(tab$body$left_header_colnames)) {
tab <- auto_detect_left_headers(tab)
}
# Autodetect the 'summary level' e.g. header 1 is most prominent, header 2 next etc.
tab <- auto_detect_body_title_level(tab, keyword)
tab
}
# Auto detect which of the columns are left headers
auto_detect_left_headers <- function(tab, keyword = "(all)") {
# Looking to write tab$body$left_header_colnames
# These must be character columns - stop if you hit a non character column
# First find all leftmost character columns, then iterate from right to left, finding the first column with keyword in it.
# This is the last left_header
col_classes <- sapply(tab$body$body_df_to_write, class)
rightmost_character <- min(which(col_classes != "character")) -1
rightmost_character_cols <- rightmost_character:1
found <- FALSE
for (col in rightmost_character_cols) {
this_col <- tab$body$body_df_to_write[,col]
if (any(keyword == this_col)) {
found <- TRUE
break
}
}
# TODO: this is not robust when multiple columns have the same name
if (found) {
left_col_names <- names(tab$body$body_df_to_write)[1:col]
} else {
left_col_names <- NULL
}
tab$body$left_header_colnames <- left_col_names
tab
}
# For title level
# Note we assume there are a maximum of 5 title levels
get_inv_title_count_title <- function(left_headers_df, keyword) {
# +-------+-------+-------+-----------+-------------+--------------+
# | col1 | col2 | col3 | all_count | title_level | indent_level |
# +-------+-------+-------+-----------+-------------+--------------+
# | (all) | (all) | (all) | 3 | title_3 | |
# | (all) | (all) | - | 2 | title_4 | indent_1 |
# | (all) | - | - | 1 | title_5 | indent_2 |
# | - | - | - | 0 | | indent_3 |
# +-------+-------+-------+-----------+-------------+--------------+
to_count <- (left_headers_df == keyword)
all_count <- rowSums(to_count)
#rows with higher (all) count should have a lower title value because title_1 is the most emphasized
all_count_inv <- 6 - all_count
all_count_inv[all_count_inv == 6] <- NA
all_count_inv
}
# For indent level
get_inv_title_count_indent <- function(left_headers_df, keyword) {
# +-------+-------+-------+-----------+-------------+--------------+
# | col1 | col2 | col3 | all_count | title_level | indent_level |
# +-------+-------+-------+-----------+-------------+--------------+
# | (all) | (all) | (all) | 3 | title_3 | |
# | (all) | (all) | - | 2 | title_4 | indent_1 |
# | (all) | - | - | 1 | title_5 | indent_2 |
# | - | - | - | 0 | | indent_3 |
# +-------+-------+-------+-----------+-------------+--------------+
to_count <- (left_headers_df == keyword)
all_count <- rowSums(to_count)
#rows with higher (all) count should have a lower title value because title_1 is the most emphasized
all_count_inv <- max(all_count) - all_count
all_count_inv[all_count_inv == 0] <- NA
all_count_inv
}
# Autodetect the 'title level' e.g. title 1 is most prominent, title 2 next etc.
auto_detect_body_title_level <- function(tab, keyword = "(all)") {
left_headers_df <- tab$body$body_df_to_write[tab$body$left_header_colnames]
all_count_inv <- get_inv_title_count_title(left_headers_df, keyword)
# Append title level to both meta_row_ and meta_left_title_row_
col <- tab$body$body_df$meta_row_[not_na(all_count_inv)]
concat <- all_count_inv[not_na(all_count_inv)]
concat <- paste0("title_", concat)
tab$body$body_df[not_na(all_count_inv),"meta_row_"] <- paste(col, concat,sep = "|")
col <- tab$body$body_df$meta_left_header_row_[not_na(all_count_inv)]
concat <- all_count_inv[not_na(all_count_inv)]
concat <- paste0("title_", concat)
tab$body$body_df[not_na(all_count_inv),"meta_left_header_row_"] <- paste(col, concat,sep = "|")
tab
}
# Consolidate the header columns into one, taking the rightmost value and applying indent
# e.g. a | b | (all) -> b
# e.g. (all) | (all) | (all) -> Grand Total
auto_style_indent <- function(tab, keyword = "(all)", total_text = "Grand Total", left_header_colname = "-") {
tab$misc$coalesce_left_header_colname = left_header_colname
if (is.null(tab$body$left_header_colnames )) {
Stop("You've called auto_style_indent, but there are no left_header_colnames to work with")
}
left_headers_df <- tab$body$body_df_to_write[tab$body$left_header_colnames]
orig_left_header_colnames <- tab$body$left_header_colnames
# count '(all)'
to_count <- (left_headers_df == keyword)
all_count <- rowSums(to_count)
# paste together all left headers
concat <- do.call(paste, c(left_headers_df, sep="=|="))
# Split concatenated string into elements, and find last element that's not (all)
elems <- strsplit(concat, "=\\|=", perl=TRUE)
last_elem <- lapply(elems, function(x) {
x <- x[x != keyword]
if (length(x) == 0) {
x <- total_text
}
tail(x,1)
})
new_left_headers <- unlist(last_elem)
# Remove original left_header_columns and replace with new
cols <- !(names(tab$body$body_df_to_write) %in% tab$body$left_header_colnames)
tab$body$body_df_to_write <- tab$body$body_df_to_write[cols]
tab$body$body_df_to_write <- cbind(new_left_headers = new_left_headers, tab$body$body_df_to_write, stringsAsFactors = FALSE)
names(tab$body$body_df_to_write)[1] <- left_header_colname
tab$body$left_header_colnames <- c(left_header_colname)
# Now need to fix meta_left_header_row_ to include indents
#Set meta_left_header_row_ to include relevant indents
all_count_inv <- get_inv_title_count_indent(left_headers_df, keyword = keyword)
col <- tab$body$body_df$meta_left_header_row_
rows_indices_to_change <- not_na(all_count_inv)
concat <- all_count_inv[rows_indices_to_change]
concat <- paste0("indent_", concat)
tab$body$body_df[rows_indices_to_change,"meta_left_header_row_"] <- paste(col[rows_indices_to_change], concat,sep = "|")
# Update body$meta_col_
tab$body$meta_col_ <- c(left_header_colname = tab$body$meta_col_[[1]], tab$body$meta_col_[cols])
names(tab$body$meta_col_)[1] <- left_header_colname
# Finally, if tab$top_headers has too many cols, remove the extra cols, removing from 2:n
if (not_null(tab$top_headers$top_headers_list)) {
len_th_cols <- length(tab$top_headers$top_headers_list[[1]])
if (len_th_cols > length(colnames(tab$body$body_df_to_write))) {
cols_to_delete <- 2:length(orig_left_header_colnames)
cols_to_retain <- !(1:len_th_cols %in% cols_to_delete)
tab$top_headers$top_headers_col_style_names <- tab$top_headers$top_headers_col_style_names[cols_to_retain]
for (r in length(tab$top_headers$top_headers_list)) {
tab$top_headers$top_headers_list[[r]] <- tab$top_headers$top_headers_list[[r]] [cols_to_retain]
tab$top_headers$top_headers_list[[r]][1] <- left_header_colname
}
}
}
tab
}