In [1]:
library(haven)
library(tidyverse)
library(broom)
library(lme4) 

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.0     [32m✔[39m [34mpurrr  [39m 0.3.2
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.2
[32m✔[39m [34mtidyr  [39m 0.8.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: Matrix

Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand



# Missing at random

In [2]:
AA_E_Long <- read.csv('./datos/AA_E_Long.csv')
AA_NE_Long <- read.csv('./datos/AA_NE_Long.csv')

moms <- read.csv('./datos/moms.csv')

head(AA_NE_Long)
head(moms)

X,id,group,age,sex,weekNames,level,week,AA
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG
2,2,teen,16,boy,ARGCalostrum,1,1,ARG
3,3,teen,17,boy,ARGCalostrum,1,1,ARG
4,4,teen,16,boy,ARGCalostrum,1,1,ARG
5,5,teen,16,,ARGCalostrum,2,1,ARG
6,6,teen,16,boy,ARGCalostrum,2,1,ARG


X,id,group,momAge,sex,momHeight,momWeight,kcal,prot,cho,lipids,glu,choles,trig,hdl,ldl,album,BMI
<int>,<int>,<fct>,<int>,<fct>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1,teen,17,girl,1.48,54.4,4204.0,103.0,759.0,86.0,115.0,205,304.9,23.7,120.4,4.44,24.84
2,2,teen,16,boy,1.52,74.7,1586.0,27.0,242.0,34.0,107.8,196,193.0,34.5,122.9,4.42,32.33
3,3,teen,17,boy,1.55,56.4,2052.0,88.0,281.0,67.0,87.2,346,285.0,23.0,266.0,4.79,23.48
4,4,teen,16,boy,1.6,65.9,,,,,95.7,273,143.2,39.9,204.4,4.47,25.74
5,5,teen,16,,1.51,56.5,1621.0,63.0,270.0,38.0,73.9,266,123.0,37.7,203.7,4.39,24.78
6,6,teen,16,boy,1.57,71.2,1932.0,100.0,311.0,28.0,100.7,224,289.0,21.9,144.3,4.49,28.89


In [3]:
weightLong <- read.csv('./datos/weightLong.csv')
headCLong <- read.csv('./datos/headCLong.csv')

In [4]:
missingData <- AA_NE_Long %>% mutate(`missing data` = ifelse(is.na(level), 1, 0))
head(missingData)

X,id,group,age,sex,weekNames,level,week,AA,missing data
<int>,<int>,<fct>,<int>,<fct>,<fct>,<int>,<int>,<fct>,<dbl>
1,1,teen,17,girl,ARGCalostrum,1,1,ARG,0
2,2,teen,16,boy,ARGCalostrum,1,1,ARG,0
3,3,teen,17,boy,ARGCalostrum,1,1,ARG,0
4,4,teen,16,boy,ARGCalostrum,1,1,ARG,0
5,5,teen,16,,ARGCalostrum,2,1,ARG,0
6,6,teen,16,boy,ARGCalostrum,2,1,ARG,0


There is not a clear distinction between patients with missing data patients and patients without:

In [5]:
numNA <- missingData %>% group_by(id, sex, age, week, AA) %>% summarise(miss = sum(`missing data`))
head(numNA)

“Factor `sex` contains implicit NA, consider using `forcats::fct_explicit_na`”

id,sex,age,week,AA,miss
<int>,<fct>,<int>,<int>,<fct>,<dbl>
1,girl,17,1,ALA,0
1,girl,17,1,ARG,0
1,girl,17,1,ASN,0
1,girl,17,1,ASP,0
1,girl,17,1,CYS,0
1,girl,17,1,GLN,0


In [6]:
numNA_CI <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, AA) %>% 
                    summarise(total_miss = sum(miss)) %>% filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no'))

numNA_CI %>% group_by(complete) %>% summarise(n())

complete,n()
<chr>,<int>
no,32
yes,29


In [7]:
numNA_CI %>% group_by(complete, sex) %>% summarise(n())

complete,sex,n()
<chr>,<fct>,<int>
no,boy,19
no,girl,13
yes,boy,16
yes,girl,13


In [8]:
completeMising <- numNA %>% filter(!is.na(sex)) %>% group_by(id, sex, age, week, AA) %>% 
                    summarise(total_miss = sum(miss)) %>% 
                    filter(AA == 'GLU') %>%
                    mutate(complete = ifelse(total_miss == 0, 'yes', 'no'))

weekSexNA <- completeMising %>%
                    group_by(complete, week, sex) %>% summarise(total = n())

## Characteristics for mothers with complete and incomplete data

In [26]:
df_incomplete <- completeMising %>% filter(complete == 'no') %>% group_by(id) %>% summarise(mean(total_miss))
id_incomplete <- df_incomplete$id
n_obs <- length(id_incomplete)
id_incomplete

In [10]:
qt(c(.025, .975), df = n_obs-1)

In [11]:
moms_complete <- moms %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))

In [12]:
moms_complete %>% group_by(complete) %>% 
    summarise(age = round(mean(momAge), 1), sdAge = 2*round(sd(momAge), 1), 
              height = round(mean(momHeight, na.rm = T), 1), sdHeight = 2*round(sd(momHeight, na.rm = T), 1),
              weight = round(mean(momWeight, na.rm = T), 1), sdWeight = 2*round(sd(momWeight, na.rm = T), 1))

complete,age,sdAge,height,sdHeight,weight,sdWeight
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
no,18.0,5.6,1.6,0.2,59.5,14.4
yes,18.7,6.2,1.5,0.2,59.4,19.8


## Diet for mothers with complete and incomplete data

In [50]:
moms_complete %>% group_by(complete) %>% filter(!is.na(sex)) %>% summarise(number = n())

moms_complete %>% group_by(complete) %>% filter(!is.na(sex)) %>% 
    summarise(age = sum(!is.na(momAge)), height = sum(!is.na(momHeight)), weight = sum(!is.na(momWeight)), 
              kcal = sum(!is.na(kcal)), prot = sum(!is.na(prot)), trig = sum(!is.na(trig)),
              cho = sum(!is.na(cho)), lipids = sum(!is.na(lipids)), glu = sum(!is.na(glu)),
              choles = sum(!is.na(choles)), hdl = sum(!is.na(hdl)),
              ldl = sum(!is.na(ldl)), album = sum(!is.na(album)))

complete,number
<chr>,<int>
no,32
yes,29


complete,age,height,weight,kcal,prot,trig,cho,lipids,glu,choles,hdl,ldl,album
<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
no,32,30,30,13,13,25,13,13,25,25,25,25,25
yes,29,27,29,22,22,29,22,22,29,29,29,29,29


In [51]:
names(moms_complete)
#moms_complete %>% group_by(complete) %>% filter(is.na(glu))

In [52]:
moms_complete %>% group_by(complete) %>% 
    summarise(mKcal = round(mean(kcal, na.rm = T), 1), sdKcal = 2*round(sd(kcal, na.rm = T), 1), 
              mProt = round(mean(prot, na.rm = T), 1), sdProt = 2*round(sd(prot, na.rm = T), 1),
              mGlu = round(mean(glu, na.rm = T), 1), sdGlu = 2*round(sd(glu, na.rm = T), 1), 
              mTrig = round(mean(trig, na.rm = T), 1), sdTrig = 2*round(sd(trig, na.rm = T), 1))

2085.2 - (2* 1729.8 / sqrt(n_obs))
161.1 - (2* 125.8 / sqrt(n_obs))

complete,mKcal,sdKcal,mProt,sdProt,mGlu,sdGlu,mTrig,sdTrig
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
no,1785.0,806.6,74.5,35.4,75.7,16.6,145.3,90.6
yes,2085.2,1729.8,78.0,55.6,82.2,27.4,161.1,125.8


In [53]:
moms_complete %>% group_by(complete) %>% 
    summarise(mCho = round(mean(cho, na.rm = T), 1), sdCho = 2*round(sd(cho, na.rm = T), 1),
              mLipids = round(mean(lipids, na.rm = T), 1), sdLipids = 2*round(sd(lipids, na.rm = T), 1), 
              mHdl = round(mean(hdl, na.rm = T), 1), sdHdl = 2*round(sd(hdl, na.rm = T), 1),
              mLdl = round(mean(ldl, na.rm = T), 1), sdLdl = 2*round(sd(ldl, na.rm = T), 1),
              mAlbum = round(mean(album, na.rm = T), 1), sdAlbum = 2*round(sd(album, na.rm = T), 1))

complete,mCho,sdCho,mLipids,sdLipids,mHdl,sdHdl,mLdl,sdLdl,mAlbum,sdAlbum
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
no,393.4,886.6,46.1,32.8,41.4,22.8,156.1,83.4,4.5,0.6
yes,329.2,311.4,52.4,55.8,44.8,29.0,163.3,76.6,4.5,0.6


In [54]:
moms_complete %>% group_by(complete)

X,id,group,momAge,sex,momHeight,momWeight,kcal,prot,cho,lipids,glu,choles,trig,hdl,ldl,album,BMI,complete
<int>,<int>,<fct>,<int>,<fct>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,1,teen,17,girl,1.48,54.4,4204,103,759,86,115.0,205.0,304.9,23.7,120.4,4.44,24.84,yes
2,2,teen,16,boy,1.52,74.7,1586,27,242,34,107.8,196.0,193.0,34.5,122.9,4.42,32.33,yes
3,3,teen,17,boy,1.55,56.4,2052,88,281,67,87.2,346.0,285.0,23.0,266.0,4.79,23.48,no
4,4,teen,16,boy,1.60,65.9,,,,,95.7,273.0,143.2,39.9,204.4,4.47,25.74,no
5,5,teen,16,,1.51,56.5,1621,63,270,38,73.9,266.0,123.0,37.7,203.7,4.39,24.78,yes
6,6,teen,16,boy,1.57,71.2,1932,100,311,28,100.7,224.0,289.0,21.9,144.3,4.49,28.89,yes
7,7,teen,16,boy,1.67,64.9,2284,87,343,65,72.0,243.0,153.4,30.0,182.3,4.78,23.27,yes
8,8,teen,16,boy,1.50,64.5,1160,38,214,20,85.7,253.0,151.1,29.1,193.6,4.42,28.67,yes
9,9,teen,17,girl,1.65,65.5,1241,69,146,42,68.3,255.0,173.0,31.3,189.1,4.55,24.06,no
10,10,teen,18,boy,1.64,63.5,,,,,69.0,159.0,113.0,41.3,95.1,4.67,23.61,no


## Weigth and Head Circumference for babies with complete and incomplete data

In [55]:
weightLongComplete <- weightLong %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))
headCLongComplete <- headCLong %>% mutate(complete = ifelse(id %in% id_incomplete, 'no', 'yes'))

head(weightLongComplete)
head(headCLongComplete)

X,id,group,sex,weekNames,weight,week,complete
<int>,<int>,<fct>,<fct>,<fct>,<dbl>,<int>,<chr>
1,1,teen,girl,BABYWeigth1g,3492.7,1,yes
2,2,teen,boy,BABYWeigth1g,3392.9,1,yes
3,3,teen,boy,BABYWeigth1g,2993.8,1,no
4,4,teen,boy,BABYWeigth1g,3692.3,1,no
5,5,teen,,BABYWeigth1g,3592.5,1,yes
6,6,teen,boy,BABYWeigth1g,3093.5,1,yes


X,id,group,sex,weekNames,head,week,complete
<int>,<int>,<fct>,<fct>,<fct>,<int>,<int>,<chr>
1,1,teen,girl,HeadCircumcm1,34,1,yes
2,2,teen,boy,HeadCircumcm1,35,1,yes
3,3,teen,boy,HeadCircumcm1,35,1,no
4,4,teen,boy,HeadCircumcm1,37,1,no
5,5,teen,,HeadCircumcm1,34,1,yes
6,6,teen,boy,HeadCircumcm1,35,1,yes


In [56]:
weightLongComplete %>% filter(!is.na(sex), week == 1) %>% group_by(complete) %>%
                    summarise(weightMean = round(mean(weight, na.rm = T), 2), 
                              weightSd = round(sd(weight, na.rm = T), 2))

complete,weightMean,weightSd
<chr>,<dbl>,<dbl>
no,3366.23,700.09
yes,3148.01,421.78


In [57]:
headCLongComplete %>% filter(!is.na(sex), week == 1) %>% group_by(complete) %>%
                    summarise(headMean = round(mean(head, na.rm = T), 2),
                              headSd = round(sd(head, na.rm = T), 2))

complete,headMean,headSd
<chr>,<dbl>,<dbl>
no,33.85,1.46
yes,33.65,0.88


### Sex distribution for patients with missing data and without in total and by week:

In [58]:
weekSexNA %>% spread(sex, total) %>% group_by(complete) %>% 
                summarise(boys = sum(boy), girls = sum(girl), proportion = round(boys / (boys + girls), 2))

complete,boys,girls,proportion
<chr>,<int>,<int>,<dbl>
no,35,26,0.57
yes,105,78,0.57


In [59]:
weekSexNA %>% spread(sex, total) %>% filter(week == 1) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 2) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 8) %>% select(-week)
weekSexNA %>% spread(sex, total) %>% filter(week == 16) %>% select(-week)

Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
1,yes,35,26


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
2,no,8,6
2,yes,27,20


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
8,no,12,11
8,yes,23,15


Adding missing grouping variables: `week`


week,complete,boy,girl
<int>,<chr>,<int>,<int>
16,no,15,9
16,yes,20,17


In [60]:
weekSexPlot <- weekSexNA %>% spread(sex, total) %>% mutate(proportion = round(boy/(boy + girl), 2)) %>%
                    select(complete, proportion, week) %>% spread(complete, proportion)

weekSexPlot

week,no,yes
<int>,<dbl>,<dbl>
1,,0.57
2,0.57,0.57
8,0.52,0.61
16,0.62,0.54


In [61]:
# Score statistic:
# p0 - p1 / sqrt(p(1 - p)(1/n0 + 1/n1))

scoreTestStat <- function(p0, p1, n0, n1, n0_S, n1_S){
    "
    Compare two binomials Bin(n0, p0) and Bin(n1, p1).
    
    Under H0: p0 = p1, TS is approx N(0, 1).

    inputs:
        pi = probability of success for variable i.
        ni = number of trials for variable i.
        ni_S = number of succesful trials for variable i.
    "

    p <- (n0_S + n1_S) / (n0 + n1)
    TS <- (p0 - p1) / sqrt( p*(1 - p)*(1/n0 + 1/n1))
    
    return(TS)
}

In [62]:
#week 8:
scoreTestStat(p0 =0.52, p1 = 0.61, n0 = 23, n1 = 38, n0_S = 12, n1_S = 23)
#week 16:
scoreTestStat(p0 =0.62, p1 = 0.54, n0 = 24, n1 = 37, n0_S = 15, n1_S = 20)

In [63]:
1 - pnorm(q = 0.67, mean = 0, sd = 1) + pnorm(q = -0.67, mean = 0, sd = 1)
1 - pnorm(q = 0.62, mean = 0, sd = 1) + pnorm(q = -0.62, mean = 0, sd = 1)

In [64]:
# week 8:
fisher.test(rbind(c(12, 23-12), c(23, 38-23)))
# week 16:
fisher.test(rbind(c(15, 24-15), c(20, 37-20)))


	Fisher's Exact Test for Count Data

data:  rbind(c(12, 23 - 12), c(23, 38 - 23))
p-value = 0.598
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.2209799 2.3049276
sample estimates:
odds ratio 
 0.7155006 



	Fisher's Exact Test for Count Data

data:  rbind(c(15, 24 - 15), c(20, 37 - 20))
p-value = 0.6008
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.441436 4.660701
sample estimates:
odds ratio 
  1.408597 


### The number of observations per week is bigger than 37

In [65]:
AA_NE_Long %>% filter(!is.na(sex), !is.na(level), AA == 'GLU') %>% group_by(week) %>% summarise(n_obs = n())

week,n_obs
<int>,<int>
1,61
2,47
8,38
16,37


# Some Extra tests

In [66]:
pAge <- t.test(moms$momAge[-id_incomplete], moms$momAge[id_incomplete])$p.value
pHeight <- t.test(moms$momHeight[-id_incomplete], moms$momHeight[id_incomplete])$p.value
pWeight <- t.test(moms$momWeight[-id_incomplete], moms$momWeight[id_incomplete])$p.value

In [67]:
pCho <- t.test(moms$cho[-id_incomplete], moms$cho[id_incomplete])$p.value
pProt <- t.test(moms$prot[-id_incomplete], moms$prot[id_incomplete])$p.value
pLipids <- t.test(moms$lipids[-id_incomplete], moms$lipids[id_incomplete])$p.value

In [68]:
wboysC <- filter(weightLongComplete, week == 1, !(id %in% id_incomplete))
wboysM <- filter(weightLongComplete, week == 1, id %in% id_incomplete)

hboysC <- filter(headCLongComplete, week == 1, !(id %in% id_incomplete))
hboysM <- filter(headCLongComplete, week == 1, id %in% id_incomplete)

In [69]:
pWeightB <- t.test(wboysC$weight, wboysM$weight)$p.value
pHeadB <- t.test(hboysC$head, hboysM$head)$p.value

In [70]:
p.adjust(c(pAge, pHeight, pWeight, pCho, pProt, pLipids, pWeightB, pHeadB), "fdr")

In [71]:
girlsW1 <- weightLongComplete %>% filter(week == 1, !is.na(sex), sex == 'girl')
boysW1 <- weightLongComplete %>% filter(week == 1, !is.na(sex), sex == 'boy')

girlsW16 <- weightLongComplete %>% filter(week == 16, !is.na(sex), sex == 'girl')
boysW16 <- weightLongComplete %>% filter(week == 16, !is.na(sex), sex == 'boy')

In [72]:
girlsH1 <- headCLongComplete %>% filter(week == 1, !is.na(sex), sex == 'girl')
boysH1 <- headCLongComplete %>% filter(week == 1, !is.na(sex), sex == 'boy')

girlsH16 <- headCLongComplete %>% filter(!is.na(sex), !is.na(head)) %>% filter(week == 16, sex == 'girl')
boysH16 <- headCLongComplete %>% filter(!is.na(sex), !is.na(head)) %>% filter(week == 16, sex == 'boy')

In [73]:
pW1 <- t.test(girlsW1$weight, boysW1$weight)$p.value
pW16 <- t.test(girlsW16$weight, boysW16$weight)$p.value

In [74]:
pH1 <- t.test(girlsH1$head, boysH1$head)$p.value
pH16 <- t.test(girlsH16$head, boysH16$head)$p.value

In [75]:
p.adjust(c(pW1, pW16, pH1, pH16), "fdr")

In [76]:
p.adjust(c(pAge, pHeight, pWeight, pCho, pProt, pLipids, pWeightB, pHeadB, pW1, pW16, pH1, pH16), "fdr")