In [1]:
library(haven)
library(tidyverse)
library(broom)
library(lme4) 

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.0     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.2
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: Matrix

Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand



# Missing at random

In [2]:
AA_E_Long <- read.csv('./datos/AA_E_Long.csv')
AA_NE_Long <- read.csv('./datos/AA_NE_Long.csv')

head(AA_NE_Long)

X,id,group,age,sex,weekNames,level,week,AA
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG
2,2,teen,16,boy,ARGCalostrum,1,1,ARG
3,3,teen,17,boy,ARGCalostrum,1,1,ARG
4,4,teen,16,boy,ARGCalostrum,1,1,ARG
5,5,teen,16,,ARGCalostrum,2,1,ARG
6,6,teen,16,boy,ARGCalostrum,2,1,ARG


In [173]:
missingData <- AA_NE_Long %>% mutate(`missing data` = ifelse(is.na(level), 1, 0))
head(missingData)

X,id,group,age,sex,weekNames,level,week,AA,missing data
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>,<dbl>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG,0
2,2,teen,16,boy,ARGCalostrum,1,1,ARG,0
3,3,teen,17,boy,ARGCalostrum,1,1,ARG,0
4,4,teen,16,boy,ARGCalostrum,1,1,ARG,0
5,5,teen,16,,ARGCalostrum,2,1,ARG,0
6,6,teen,16,boy,ARGCalostrum,2,1,ARG,0


### Not many with more than 3 missing data

There is not a clear distinction between missing data patients and not

In [180]:
numNA <- missingData %>% group_by(id, sex, age, week, AA) %>% summarise(miss = sum(`missing data`))
head(numNA)

“Factor `sex` contains implicit NA, consider using `forcats::fct_explicit_na`”

id,sex,age,week,AA,miss
<int>,<fct>,<int>,<int>,<fct>,<dbl>
1,girl,17,1,ALA,0
1,girl,17,1,ARG,0
1,girl,17,1,ASN,0
1,girl,17,1,ASP,0
1,girl,17,1,CYS,0
1,girl,17,1,GLN,0


In [192]:
numNA_CI <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, AA) %>% summarise(total_miss = sum(miss)) %>% filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no'))

numNA_CI %>% group_by(complete) %>% summarise(n())

complete,n()
<chr>,<int>
no,32
yes,29


In [194]:
numNA_CI %>% group_by(complete, sex) %>% summarise(n())

complete,sex,n()
<chr>,<fct>,<int>
no,boy,19
no,girl,13
yes,boy,16
yes,girl,13


In [227]:
weekSexNA <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, week, AA) %>% summarise(total_miss = sum(miss)) %>% filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no')) %>%
                    group_by(complete, week, sex) %>% summarise(total = n())

weekSexNA %>% spread(sex, total) %>% mutate(total = boy + girl) %>%
            select(complete, boy, total, week) 

complete,boy,total,week
<chr>,<int>,<int>,<int>
no,8,14,2
no,12,23,8
no,15,24,16
yes,35,61,1
yes,27,47,2
yes,23,38,8
yes,20,37,16


In [230]:
prop.test(x = c(8, 27), n = c(14, 47)) # week 2
prop.test(x = c(12, 23), n = c(23, 38)) # week 8
prop.test(x = c(15, 20), n = c(24, 37)) # week 16


	2-sample test for equality of proportions with continuity correction

data:  c(8, 27) out of c(14, 47)
X-squared = 9.3936e-31, df = 1, p-value = 1
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.3013375  0.2952585
sample estimates:
   prop 1    prop 2 
0.5714286 0.5744681 



	2-sample test for equality of proportions with continuity correction

data:  c(12, 23) out of c(23, 38)
X-squared = 0.13853, df = 1, p-value = 0.7097
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.3749921  0.2079440
sample estimates:
   prop 1    prop 2 
0.5217391 0.6052632 



	2-sample test for equality of proportions with continuity correction

data:  c(15, 20) out of c(24, 37)
X-squared = 0.14948, df = 1, p-value = 0.699
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.2014811  0.3704001
sample estimates:
   prop 1    prop 2 
0.6250000 0.5405405 


In [221]:
weekSexPlot <- weekSexNA %>% spread(sex, total) %>% mutate(proportion = round(boy/(boy + girl), 2)) %>%
                    select(complete, proportion, week) %>% spread(complete, proportion)

weekSexPlot

week,no,yes
<int>,<dbl>,<dbl>
1,,0.57
2,0.57,0.57
8,0.52,0.61
16,0.62,0.54


# Missing data increases with time

The number of observations per week is bigger than 37

In [140]:
AA_NE_Long %>% filter(!is.na(level), AA == 'GLU') %>% group_by(week) %>% summarise(n_obs = n())

week,n_obs
<int>,<int>
1,65
2,48
8,39
16,37
