In [1]:
library(haven)
library(tidyverse)
library(broom)
library(lme4) 

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.0     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.2
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: Matrix

Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand



# Missing at random

In [2]:
AA_E_Long <- read.csv('./datos/AA_E_Long.csv')
AA_NE_Long <- read.csv('./datos/AA_NE_Long.csv')

moms <- read.csv('./datos/moms.csv')

head(AA_NE_Long)
head(moms)

X,id,group,age,sex,weekNames,level,week,AA
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG
2,2,teen,16,boy,ARGCalostrum,1,1,ARG
3,3,teen,17,boy,ARGCalostrum,1,1,ARG
4,4,teen,16,boy,ARGCalostrum,1,1,ARG
5,5,teen,16,,ARGCalostrum,2,1,ARG
6,6,teen,16,boy,ARGCalostrum,2,1,ARG


X,id,group,momAge,sex,momHeight,momWeight,BMI
<int>,<int>,<fct>,<int>,<fct>,<dbl>,<dbl>,<dbl>
1,1,teen,17,girl,1.48,54.4,24.84
2,2,teen,16,boy,1.52,74.7,32.33
3,3,teen,17,boy,1.55,56.4,23.48
4,4,teen,16,boy,1.6,65.9,25.74
5,5,teen,16,,1.51,56.5,24.78
6,6,teen,16,boy,1.57,71.2,28.89


In [3]:
weightLong <- read.csv('./datos/weightLong.csv')
headCLong <-read.csv('./datos/headCLong.csv')

In [4]:
missingData <- AA_NE_Long %>% mutate(`missing data` = ifelse(is.na(level), 1, 0))
head(missingData)

X,id,group,age,sex,weekNames,level,week,AA,missing data
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>,<dbl>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG,0
2,2,teen,16,boy,ARGCalostrum,1,1,ARG,0
3,3,teen,17,boy,ARGCalostrum,1,1,ARG,0
4,4,teen,16,boy,ARGCalostrum,1,1,ARG,0
5,5,teen,16,,ARGCalostrum,2,1,ARG,0
6,6,teen,16,boy,ARGCalostrum,2,1,ARG,0


There is not a clear distinction between patients with missing data patients and patients without:

In [5]:
numNA <- missingData %>% group_by(id, sex, age, week, AA) %>% summarise(miss = sum(`missing data`))
head(numNA)

“Factor `sex` contains implicit NA, consider using `forcats::fct_explicit_na`”

id,sex,age,week,AA,miss
<int>,<fct>,<int>,<int>,<fct>,<dbl>
1,girl,17,1,ALA,0
1,girl,17,1,ARG,0
1,girl,17,1,ASN,0
1,girl,17,1,ASP,0
1,girl,17,1,CYS,0
1,girl,17,1,GLN,0


In [6]:
numNA_CI <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, AA) %>% 
                    summarise(total_miss = sum(miss)) %>% filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no'))

numNA_CI %>% group_by(complete) %>% summarise(n())

complete,n()
<chr>,<int>
no,32
yes,29


In [7]:
numNA_CI %>% group_by(complete, sex) %>% summarise(n())

complete,sex,n()
<chr>,<fct>,<int>
no,boy,19
no,girl,13
yes,boy,16
yes,girl,13


In [8]:
completeMising <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, week, AA) %>% 
                    summarise(total_miss = sum(miss)) %>% 
                    filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no'))

weekSexNA <- completeMising %>%
                    group_by(complete, week, sex) %>% summarise(total = n())

## Characteristics for mothers with complete and incomplete data

In [9]:
df_incomplete <- completeMising %>% filter(complete == 'no') %>% group_by(id) %>% summarise(mean(total_miss))
id_incomplete <- df_incomplete$id
id_incomplete

In [10]:
moms_complete <- moms %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))

In [11]:
moms_complete %>% group_by(complete) %>% 
    summarise(age = round(mean(momAge), 1), sdAge = 2*round(sd(momAge), 1), 
              height = round(mean(momHeight, na.rm = T), 1), sdHeight = 2*round(sd(momHeight, na.rm = T), 1),
              weight = round(mean(momWeight, na.rm = T), 1), sdWeight = 2*round(sd(momWeight, na.rm = T), 1))

complete,age,sdAge,height,sdHeight,weight,sdWeight
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
no,18.0,5.6,1.6,0.2,59.5,14.4
yes,18.7,6.2,1.5,0.2,59.4,19.8


## Weigth and Head Circumference for babies with complete and incomplete data

In [12]:
weightLongComplete <- weightLong %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))
headCLongComplete <- headCLong %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))

head(weightLongComplete)
head(headCLongComplete)

X,id,group,sex,weekNames,weight,week,complete
<int>,<int>,<fct>,<fct>,<fct>,<dbl>,<int>,<chr>
1,1,teen,girl,BABYWeigth1g,3492.7,1,yes
2,2,teen,boy,BABYWeigth1g,3392.9,1,yes
3,3,teen,boy,BABYWeigth1g,2993.8,1,no
4,4,teen,boy,BABYWeigth1g,3692.3,1,no
5,5,teen,,BABYWeigth1g,3592.5,1,yes
6,6,teen,boy,BABYWeigth1g,3093.5,1,yes


X,id,group,sex,weekNames,head,week,complete
<int>,<int>,<fct>,<fct>,<fct>,<int>,<int>,<chr>
1,1,teen,girl,HeadCircumcm1,34,1,yes
2,2,teen,boy,HeadCircumcm1,35,1,yes
3,3,teen,boy,HeadCircumcm1,35,1,no
4,4,teen,boy,HeadCircumcm1,37,1,no
5,5,teen,,HeadCircumcm1,34,1,yes
6,6,teen,boy,HeadCircumcm1,35,1,yes


In [13]:
weightLongComplete %>% filter(!is.na(sex), week == 1) %>% group_by(complete) %>%
                    summarise(weightMean = round(mean(weight, na.rm = T), 2), 
                              weightSd = round(sd(weight, na.rm = T), 2))

complete,weightMean,weightSd
<chr>,<dbl>,<dbl>
no,3366.23,700.09
yes,3148.01,421.78


In [14]:
headCLongComplete %>% filter(!is.na(sex), week == 1) %>% group_by(complete) %>%
                    summarise(headMean = round(mean(head, na.rm = T), 2),
                              headSd = round(sd(head, na.rm = T), 2))

complete,headMean,headSd
<chr>,<dbl>,<dbl>
no,33.85,1.46
yes,33.65,0.88


### Sex distribution for patients with missing data and without by week:

In [15]:
weekSexNA %>% spread(sex, total) %>% filter(week == 1) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 2) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 8) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 16) %>% select(-week)

Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
1,yes,35,26


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
2,no,8,6
2,yes,27,20


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
8,no,12,11
8,yes,23,15


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
16,no,15,9
16,yes,20,17


In [16]:
weekSexPlot <- weekSexNA %>% spread(sex, total) %>% mutate(proportion = round(boy/(boy + girl), 2)) %>%
                    select(complete, proportion, week) %>% spread(complete, proportion)

weekSexPlot

week,no,yes
<int>,<dbl>,<dbl>
1,,0.57
2,0.57,0.57
8,0.52,0.61
16,0.62,0.54


In [17]:
# Score statistic:
# p0 - p1 / sqrt(p(1 - p)(1/n0 + 1/n1))

scoreTestStat <- function(p0, p1, n0, n1, n0_S, n1_S){
    "
    Compare two binomials Bin(n0, p0) and Bin(n1, p1).
    
    Under H0: p0 = p1, TS is approx N(0, 1).

    inputs:
        pi = probability of success for variable i.
        ni = number of trials for variable i.
        ni_S = number of succesful trials for variable i.
    "

    p <- (n0_S + n1_S) / (n0 + n1)
    TS <- (p0 - p1) / sqrt( p*(1 - p)*(1/n0 + 1/n1))
    
    return(TS)
}

In [18]:
#week 8:
scoreTestStat(p0 =0.52, p1 = 0.61, n0 = 23, n1 = 38, n0_S = 12, n1_S = 23)
#week 16:
scoreTestStat(p0 =0.62, p1 = 0.54, n0 = 24, n1 = 37, n0_S = 15, n1_S = 20)

In [19]:
1 - pnorm(q = 0.67, mean = 0, sd = 1) + pnorm(q = -0.67, mean = 0, sd = 1)
1 - pnorm(q = 0.62, mean = 0, sd = 1) + pnorm(q = -0.62, mean = 0, sd = 1)

In [20]:
# week 8:
fisher.test(rbind(c(12, 23-12), c(23, 38-23)))
# week 16:
fisher.test(rbind(c(15, 24-15), c(20, 37-20)))


	Fisher's Exact Test for Count Data

data:  rbind(c(12, 23 - 12), c(23, 38 - 23))
p-value = 0.598
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.2209799 2.3049276
sample estimates:
odds ratio 
 0.7155006 



	Fisher's Exact Test for Count Data

data:  rbind(c(15, 24 - 15), c(20, 37 - 20))
p-value = 0.6008
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.441436 4.660701
sample estimates:
odds ratio 
  1.408597 


### The number of observations per week is bigger than 37

In [21]:
AA_NE_Long %>% filter(!is.na(sex), !is.na(level), AA == 'GLU') %>% group_by(week) %>% summarise(n_obs = n())

week,n_obs
<int>,<int>
1,61
2,47
8,38
16,37
