/
missing-data.Rmd
151 lines (117 loc) · 4.18 KB
/
missing-data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
---
title: "Missing Data"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Missing Data}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r setup, echo=FALSE, results='hide', warning=FALSE}
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
background = '#F7F7F7',
fig.align = 'center',
dev = 'png',
comment = "#>"
)
# keep examples from using more than 2 cores
data.table::setDTthreads(Sys.getenv("OMP_THREAD_LIMIT", unset = 2))
options(width = 100, stringsAsFactors = FALSE, timeout = 600)
```
```{r}
library(aqp)
library(soilDB)
```
```{r}
# example data
data("jacobs2000")
# fully populated
plotSPC(jacobs2000, name.style = 'center-center',
cex.names = 0.8, color = 'time_saturated')
# missing some data
plotSPC(jacobs2000, name.style = 'center-center',
cex.names = 0.8, color = 'concentration_color')
# very nearly complete
plotSPC(jacobs2000, name.style = 'center-center',
cex.names = 0.8, color = 'matrix_color')
# variables to consider
v <- c('time_saturated', 'concentration_color', 'matrix_color')
# compute data completeness by profile
# ignore 2C horizons
jacobs2000$data.complete <- evalMissingData(
jacobs2000,
vars = v,
method = 'relative',
p = '2C'
)
jacobs2000$data.complete.abs <- evalMissingData(
jacobs2000,
vars = v,
method = 'absolute',
p = '2C'
)
# compute data completeness by horizon
# ignore 2C horizons
jacobs2000$hz.data.complete <- evalMissingData(
jacobs2000,
vars = v,
method = 'horizon',
p = '2C'
)
# "fraction complete" by horizon
plotSPC(
jacobs2000, name.style = 'center-center',
cex.names = 0.8, color = 'hz.data.complete'
)
# rank on profile completeness
new.order <- order(jacobs2000$data.complete)
# plot along data completeness ranking
plotSPC(
jacobs2000, name.style = 'center-center',
cex.names = 0.8, color = 'hz.data.complete',
plot.order = new.order
)
# add relative completeness axis
# note re-ordering of axis labels
axis(
side = 1, at = 1:length(jacobs2000),
labels = round(jacobs2000$data.complete[new.order], 2),
line = 0, cex.axis = 0.75
)
# add absolute completeness (cm)
axis(
side = 1, at = 1:length(jacobs2000),
labels = jacobs2000$data.complete.abs[new.order],
line = 2.5, cex.axis=0.75
)
# label axes
mtext('Relative\nCompleteness', side = 1, at = 0.25, line = 0.25, cex = 0.8)
mtext('Absolute\nCompleteness (cm)', side = 1, at = 0.25, line = 2.75, cex = 0.8)
```
```{r}
x <- fetchKSSL(series = 'pierre')
par(mar = c(0, 0, 3, 2))
plotSPC(x, color = 'clay', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
plotSPC(x, color = 'cec7', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
plotSPC(x, color = 'estimated_oc', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
plotSPC(x, color = 'ph_h2o', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
plotSPC(x, color = 'db_13b', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
par(mar = c(1, 0, 3, 2))
plotSPC(x, color = 'ph_h2o', width = 0.3, name.style = 'center-center', label = 'pedon_completeness_index')
.b <- x[, , .LAST, .BOTTOM]
text(x = 1:length(x), y = .b, labels = x$pi, cex = 0.85, pos = 1)
mtext('Profile Information Index (bytes)', side = 1, line = -0.5)
v <- c('clay', 'db_13b', 'cec7', 'ph_h2o')
x$rel.not.missing <- evalMissingData(x, vars = v, method = 'relative')
x$abs.not.missing <- evalMissingData(x, vars = v, method = 'absolute')
x$hz.not.missing <- evalMissingData(x, vars = v, method = 'horizon')
o <- order(x$rel.not.missing)
plotSPC(x, color = 'hz.not.missing', width = 0.33, name.style = 'center-center', label = 'pedon_completeness_index', plot.order = o)
text(x = 1:length(x), y = .b[o], labels = round(x$rel.not.missing[o], 2), cex = 0.85, pos = 1)
mtext('Relative Non-Missing Fraction', side = 1, line = -0.5)
o <- order(x$abs.not.missing)
plotSPC(x, color = 'hz.not.missing', width = 0.33, name.style = 'center-center', label = 'pedon_completeness_index', plot.order = o)
text(x = 1:length(x), y = .b[o], labels = x$abs.not.missing[o], cex = 0.85, pos = 1)
mtext('Absolute Non-Missing (cm)', side = 1, line = -0.5)
```