In [1]:
library(tidyverse)
library(broom)
library(car)
library(rms)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.0     [32m✔[39m [34mdplyr  [39m 1.0.5
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: carData


Attaching package: ‘car’


The following object is masked from ‘package:dplyr’:

    recode


The following object is masked from ‘package:purrr’:

    some


Loading required package: Hmisc

Loading required package: lattice

Loading requi

In [3]:
macro_nut_portions_info.csv <- read_csv('../data/macro_nut_portions_info.csv')
conditions_info <- read_csv('../data/conditions_info.csv') 


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
  id = [32mcol_double()[39m,
  carbohydrates = [32mcol_double()[39m,
  energy = [32mcol_double()[39m,
  fiber = [32mcol_double()[39m,
  lipids = [32mcol_double()[39m,
  protein = [32mcol_double()[39m,
  energy_macro = [32mcol_double()[39m,
  percent_carbohydrate = [32mcol_double()[39m,
  percent_protein = [32mcol_double()[39m,
  percent_lipids = [32mcol_double()[39m,
  beef_cat = [31mcol_character()[39m,
  dairy_cat = [31mcol_character()[39m,
  legumes_cat = [31mcol_character()[39m,
  white_meat_cat = [31mcol_character()[39m,
  beef = [32mcol_double()[39m,
  dairy = [32mcol_double()[39m,
  legumes = [32mcol_double()[39m,
  white_meat = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────

In [4]:
macro_info <- conditions_info %>% 
    inner_join(macro_nut_portions_info.csv, by = 'id')

## Macronutrients and conditions

In [5]:
macro_info %>% 
    select(contains("percent_")) %>% 
    pivot_longer(cols = contains("percent_"), names_to = "source", values_to = "percent") %>% 
    group_by(source) %>% 
    summarise(m_macro = round(100*mean(percent), 1), sd_macro = round(100*sd(percent), 1))

source,m_macro,sd_macro
<chr>,<dbl>,<dbl>
percent_carbohydrate,54.7,7.8
percent_lipids,22.8,5.3
percent_protein,22.5,4.9


In [6]:
t_test_macro <- function(condition, macro){
    
    t_df <- macro_info %>% 
                mutate(overweight_obesity = ifelse(bmi >= 25, TRUE, FALSE)) %>% 
                na.omit() %>% 
                select(condition, macro) 

    t <- t.test(x = t_df %>% filter(get(condition) == TRUE), 
                y = t_df %>% filter(get(condition) == FALSE))
    
    return(t)
}

In [7]:
energy_met_s <- macro_info %>% 
    na.omit() %>% 
    select(met_s, energy) %>% 
    pivot_longer(cols = -met_s, names_to = 'source', values_to = 'percent') %>% 
    group_by(met_s, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(m_percent, 0), sd_percent = round(sd_percent, 0), 
           dist = paste(m_percent,sd_percent, sep='')) %>% 
    select(met_s, source, dist) %>% 
    pivot_wider(names_from = met_s, values_from = dist) %>% 
    mutate(condition = 'met_s') %>% 
    bind_cols(p_val = round(t_test_macro("met_s", "energy")$p.val, 3))

`summarise()` has grouped output by 'met_s'. You can override using the `.groups` argument.

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(condition)` instead of `condition` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(macro)` instead of `macro` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



In [8]:
mets_macro <- macro_info %>% 
    na.omit() %>% 
    select(met_s, contains('percent')) %>% 
    pivot_longer(cols = -met_s, names_to = 'source', values_to = 'percent') %>% 
    mutate(source = str_remove(source, 'percent_')) %>% 
    group_by(met_s, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(100*m_percent, 0), sd_percent = round(100*sd_percent, 0), 
           dist = paste(m_percent, '% ', sd_percent, '%', sep='')) %>% 
    select(met_s, source, dist) %>% 
    pivot_wider(names_from = met_s, values_from = dist) %>% 
    mutate(condition = 'met_s') %>% 
    bind_cols(tibble(p_val = c(t_test_macro("met_s", "carbohydrates")$p.val, 
                               t_test_macro("met_s", "lipids")$p.val, 
                               t_test_macro("met_s", "protein")$p.val))) %>% 
    mutate(p_val = round(p_val, 3))

`summarise()` has grouped output by 'met_s'. You can override using the `.groups` argument.



In [9]:
dm2_macro <- macro_info %>% 
    na.omit() %>% 
    select(d_diagnosed, contains('percent')) %>% 
    pivot_longer(cols = -d_diagnosed, names_to = 'source', values_to = 'percent') %>% 
    mutate(source = str_remove(source, 'percent_')) %>% 
    group_by(d_diagnosed, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(100*m_percent, 0), sd_percent = round(100*sd_percent, 0), 
           dist = paste(m_percent, '% ', sd_percent, '%', sep='')) %>% 
    select(d_diagnosed, source, dist) %>% 
    pivot_wider(names_from = d_diagnosed, values_from = dist)%>% 
    mutate(condition = 'dm2') %>% 
    bind_cols(tibble(p_val = c(t_test_macro("d_diagnosed", "carbohydrates")$p.val, 
                               t_test_macro("d_diagnosed", "lipids")$p.val, 
                               t_test_macro("d_diagnosed", "protein")$p.val))) %>% 
    mutate(p_val = round(p_val, 3))

`summarise()` has grouped output by 'd_diagnosed'. You can override using the `.groups` argument.



In [10]:
energy_dm2 <- macro_info %>% 
    na.omit() %>% 
    select(d_diagnosed, energy) %>% 
    pivot_longer(cols = -d_diagnosed, names_to = 'source', values_to = 'percent') %>% 
    group_by(d_diagnosed, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(m_percent, 0), sd_percent = round(sd_percent, 0), 
           dist = paste(m_percent, sd_percent, sep='')) %>% 
    select(d_diagnosed, source, dist) %>% 
    pivot_wider(names_from = d_diagnosed, values_from = dist) %>% 
    mutate(condition = 'd_diagnosed') %>% 
    bind_cols(p_val = round(t_test_macro("d_diagnosed", "energy")$p.val, 3))

`summarise()` has grouped output by 'd_diagnosed'. You can override using the `.groups` argument.



In [11]:
overob_macro <- macro_info %>% 
    mutate(overweight_obesity = ifelse(bmi >= 25, TRUE, FALSE)) %>% 
    na.omit() %>% 
    select(overweight_obesity, contains('percent')) %>% 
    pivot_longer(cols = -overweight_obesity, names_to = 'source', values_to = 'percent') %>% 
    mutate(source = str_remove(source, 'percent_')) %>% 
    group_by(overweight_obesity, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(100*m_percent, 0), sd_percent = round(100*sd_percent, 0), 
           dist = paste(m_percent, '% ', sd_percent, '%', sep='')) %>% 
    select(overweight_obesity, source, dist) %>% 
    pivot_wider(names_from = overweight_obesity, values_from = dist) %>% 
    mutate(condition = 'overweight_obesity') %>% 
    bind_cols(tibble(p_val = c(t_test_macro("overweight_obesity", "carbohydrates")$p.val, 
                               t_test_macro("overweight_obesity", "lipids")$p.val, 
                               t_test_macro("overweight_obesity", "protein")$p.val))) %>% 
    mutate(p_val = round(p_val, 3))

`summarise()` has grouped output by 'overweight_obesity'. You can override using the `.groups` argument.



In [12]:
energy_overob <- macro_info %>% 
    mutate(overweight_obesity = ifelse(bmi >= 25, TRUE, FALSE)) %>% 
    na.omit() %>% 
    select(overweight_obesity, energy) %>% 
    pivot_longer(cols = -overweight_obesity, names_to = 'source', values_to = 'percent') %>% 
    group_by(overweight_obesity, source) %>% 
    summarise(m_percent = mean(percent), sd_percent = sd(percent)) %>% 
    mutate(m_percent = round(m_percent, 0), sd_percent = round(sd_percent, 0), 
           dist = paste(m_percent, sd_percent, sep='')) %>% 
    select(overweight_obesity, source, dist) %>% 
    pivot_wider(names_from = overweight_obesity, values_from = dist) %>% 
    mutate(condition = 'overweight_obesity') %>% 
    bind_cols(p_val = round(t_test_macro("overweight_obesity", "energy")$p.val, 3))

`summarise()` has grouped output by 'overweight_obesity'. You can override using the `.groups` argument.



In [13]:
bind_rows(mets_macro, dm2_macro, overob_macro) 

source,FALSE,TRUE,condition,p_val
<chr>,<chr>,<chr>,<chr>,<dbl>
carbohydrate,54% 8%,55% 8%,met_s,0.589
lipids,23% 5%,22% 5%,met_s,0.841
protein,22% 5%,23% 5%,met_s,0.394
carbohydrate,55% 8%,55% 8%,dm2,0.843
lipids,23% 5%,22% 6%,dm2,0.214
protein,22% 5%,23% 5%,dm2,0.537
carbohydrate,55% 8%,55% 8%,overweight_obesity,0.954
lipids,23% 5%,23% 5%,overweight_obesity,0.45
protein,22% 5%,23% 5%,overweight_obesity,0.427


In [14]:
bind_rows(energy_met_s, energy_dm2, energy_overob)

source,FALSE,TRUE,condition,p_val
<chr>,<chr>,<chr>,<chr>,<dbl>
energy,39442498,40132218,met_s,0.675
energy,39762398,39632204,d_diagnosed,0.965
energy,39372043,39852467,overweight_obesity,0.8
