In [31]:
# setwd("home/bogdan/Desktop/Amgen_code")
library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(car)
library(lmtest)
library(nortest)
library(multcomp)
library(emmeans)
# library(tidyverse)

In [2]:
x = read.delim("input.txt", header = T, sep="\t", stringsAsFactors = FALSE)
colnames(x)

In [3]:
# DATA EXPLORATION, in concordance with the longitudinal treatment evaluation

In [4]:
print("the subjects enrolled in the study :")
table(x$subject)
print("the number of subjects is :")
length(unique(x$subject))
print("the timepoints :")
table(x$timepoint)
print("the markers :")
table(x$marker)
print("the treatment groups :")
table(x$treatment_group)

[1] "the subjects enrolled in the study :"



 A  B  C  D  E  F  G  H  J  K  L  M  N  P  Q  R  S  T  U  V  W 
12 15 15 15 15  9 15  3  9 15  5 12 15 15 12 12 15 12 15  6 12 

[1] "the number of subjects is :"


[1] "the timepoints :"



 DAY1 DAY15 DAY22 DAY29  DAY8 
   56    51    48    45    54 

[1] "the markers :"



C4 C8 TG 
85 85 84 

[1] "the treatment groups :"



TA TB TC 
83 87 84 

In [5]:
# coding subject, timepoint, marker, treatment_group as FACTORS
x$subject <- factor(x$subject)
x$timepoint <- factor(x$timepoint)
x$marker <- factor(x$marker)
x$treatment_group <- factor(x$treatment_group)

# ensuring that 'timepoint' is a factor with the desired order
x$timepoint <- factor(x$timepoint, levels = c("DAY1", "DAY8", "DAY15", "DAY22", "DAY29"))

In [6]:
# reformatting the data by using reshape2 package :

x_wide <- reshape(
  x,
  idvar = c("subject", "marker", "treatment_group"),  # Variables to keep constant
  timevar = "timepoint",                              # Variable that will become columns
  direction = "wide",                                 # Convert from long to wide format
  v.names = "analyte_value"                           # Values to spread across the new columns
)

# Print the transformed data
colnames(x_wide)
head(x_wide, 2)
tail(x_wide, 2)

Unnamed: 0_level_0,subject,marker,treatment_group,analyte_value.DAY1,analyte_value.DAY15,analyte_value.DAY22,analyte_value.DAY29,analyte_value.DAY8
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>
1,A,C4,TA,169,308,,290,187
2,A,C8,TA,207,418,,479,156


Unnamed: 0_level_0,subject,marker,treatment_group,analyte_value.DAY1,analyte_value.DAY15,analyte_value.DAY22,analyte_value.DAY29,analyte_value.DAY8
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>,<int>,<int>,<int>,<int>
190,W,C8,TC,407,499,396,421,
191,W,TG,TC,36,338,221,99,


In [7]:
# 4. Please test for each marker whether Day 8 and Day 1 readouts are different at alpha = 0.05 under each treatment

In [8]:
# we select the relevant columns :
#                        subject, 
#                        marker,
#                        treatment_group,
#                        analyte_value.DAY1,
#                        analyte_value.DAY8


a <- x_wide %>% dplyr::select(subject, 
                        marker,
                        treatment_group,
                        analyte_value.DAY1,
                        analyte_value.DAY8)

print("the markers that are present :")
table(a$marker)

head(a, 2)
tail(a, 2)

[1] "the markers that are present :"



C4 C8 TG 
21 21 21 

Unnamed: 0_level_0,subject,marker,treatment_group,analyte_value.DAY1,analyte_value.DAY8
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>,<int>
1,A,C4,TA,169,187
2,A,C8,TA,207,156


Unnamed: 0_level_0,subject,marker,treatment_group,analyte_value.DAY1,analyte_value.DAY8
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>,<int>
190,W,C8,TC,407,
191,W,TG,TC,36,


In [9]:
# In order to determine the type of statistical test to use (T-test, Welch T-test, Wilcoxon signed rank test) 
# we check :
# NORMAL DISTRIBUTION of ANALYTE_VALUES
# HOMOSCEDASTICITY or HETEROSCEDASTICITY of ANALYTE_VALUES

In [10]:
# NORMAL DISTRIBUTION of ANALYTE_VALUES

In [11]:
# which test to use to assess normal distribution of the data ?

# Shapiro-Wilk : it is widely used and generally recommended for smaller sample sizes.

# Anderson-Darling Test : it is more robust in detecting deviations from normality in larger samples.

# !!! In our case, we can not apply AD test, because the sample size must be greater than 7.

In [12]:
a_ta <- a %>% filter(treatment_group == "TA")
# head(a_ta, 2)
# tail(a_ta, 2)

a_tc <- a %>% filter(treatment_group == "TC")
# head(a_tc, 2)
# tail(a_tc, 2)

a_tb <- a %>% filter(treatment_group == "TB")
print(a_tb)

   subject marker treatment_group analyte_value.DAY1 analyte_value.DAY8
1        C     C4              TB               2328               2534
2        C     C8              TB               6456               6384
3        C     TG              TB                 27                 71
4        D     C4              TB                117                304
5        D     C8              TB               1209               1711
6        D     TG              TB                 13                 29
7        E     C4              TB                162                254
8        E     C8              TB                466                462
9        E     TG              TB                  8                 50
10       G     C4              TB                436                657
11       G     C8              TB                245                522
12       G     TG              TB                132                270
13       K     C4              TB                767            

In [13]:
sw_normality_results_a_ta <- a_ta %>%
  group_by(marker) %>%
  summarize(
    shapiro_day1_p_value = shapiro.test(analyte_value.DAY1)$p.value,
    shapiro_day8_p_value = shapiro.test(analyte_value.DAY8)$p.value,
    day1_normal = ifelse(shapiro_day1_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    day8_normal = ifelse(shapiro_day8_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    .groups = 'drop'
  )

# View the results
print(sw_normality_results_a_ta)

[90m# A tibble: 3 × 5[39m
  marker shapiro_day1_p_value shapiro_day8_p_value day1_normal       day8_normal
  [3m[90m<fct>[39m[23m                 [3m[90m<dbl>[39m[23m                [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             [3m[90m<chr>[39m[23m      
[90m1[39m C4                   0.022[4m6[24m               0.625  Not Normal distr… Normal dis…
[90m2[39m C8                   0.036[4m7[24m               0.052[4m3[24m Not Normal distr… Normal dis…
[90m3[39m TG                   0.768                0.030[4m7[24m Normal distrib    Not Normal…


In [14]:
sw_normality_results_a_tb <- a_tb %>%
  group_by(marker) %>%
  summarize(
    shapiro_day1_p_value = shapiro.test(analyte_value.DAY1)$p.value,
    shapiro_day8_p_value = shapiro.test(analyte_value.DAY8)$p.value,
    day1_normal = ifelse(shapiro_day1_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    day8_normal = ifelse(shapiro_day8_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    .groups = 'drop'
  )

# View the results
print(sw_normality_results_a_tb)

[90m# A tibble: 3 × 5[39m
  marker shapiro_day1_p_value shapiro_day8_p_value day1_normal       day8_normal
  [3m[90m<fct>[39m[23m                 [3m[90m<dbl>[39m[23m                [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             [3m[90m<chr>[39m[23m      
[90m1[39m C4                  0.058[4m1[24m               0.072[4m9[24m  Normal distrib    Normal dis…
[90m2[39m C8                  0.004[4m1[24m[4m3[24m              0.009[4m0[24m[4m5[24m Not Normal distr… Not Normal…
[90m3[39m TG                  0.171                0.199   Normal distrib    Normal dis…


In [15]:
sw_normality_results_a_tc <- a_tc %>%
  group_by(marker) %>%
  summarize(
    shapiro_day1_p_value = shapiro.test(analyte_value.DAY1)$p.value,
    shapiro_day8_p_value = shapiro.test(analyte_value.DAY8)$p.value,
    day1_normal = ifelse(shapiro_day1_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    day8_normal = ifelse(shapiro_day8_p_value >= 0.05, "Normal distrib", "Not Normal distrib"),
    .groups = 'drop'
  )

# View the results
print(sw_normality_results_a_tc)

[90m# A tibble: 3 × 5[39m
  marker shapiro_day1_p_value shapiro_day8_p_value day1_normal       day8_normal
  [3m[90m<fct>[39m[23m                 [3m[90m<dbl>[39m[23m                [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             [3m[90m<chr>[39m[23m      
[90m1[39m C4                 0.059[4m2[24m                  0.919 Normal distrib    Normal dis…
[90m2[39m C8                 0.000[4m3[24m[4m1[24m[4m8[24m                0.883 Not Normal distr… Normal dis…
[90m3[39m TG                 0.900                   0.885 Normal distrib    Normal dis…


In [16]:
# CONCLUSIONS :
# with a few exceptions, these tests inform us that the analyte values data at Day 1 and Day 8 is NORMALLY DISTRIBUTED

In [17]:
# HOMOSCEDASTICITY or HETEROSCEDASTICITY of ANALYTE_VALUES

In [18]:
# Next, we check the equality between variances for each marker analyte_values at Day 1 and Day 8. 

# There are many statistical tests that we can use : 
# F-test (var.test)
# Levene test
# Bartlett's Test
# Fligner-Killeen Test

# I have noted that the results that we obtain with each of these tests are relatively similar.

In [19]:
# Treatment TA
# The p-value is greater than 0.05, it suggests no significant difference in variances for the group associated with each marker.

df = a_ta

df_long <- df %>%
  pivot_longer(cols = starts_with("analyte_value"), names_to = "day", values_to = "value") %>%
  mutate(day = ifelse(day == "analyte_value.DAY1", "Day1", "Day8"))

# Perform F-test for equal variances for each marker and check statistical significance
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = var.test(value[day == "Day1"], value[day == "Day8"])$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("F-test (var.test)")
equal_variance_results

# Levene's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = leveneTest(value ~ day)$"Pr(>F)"[1],
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Levene's test")
equal_variance_results

# Bartlett's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = bartlett.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Bartlett's test")
equal_variance_results

# Fligner-Killeen test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = fligner.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Fligner-Killeen")
equal_variance_results


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "F-test (var.test)"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.5985968,Not Significant
C8,0.1154033,Not Significant
TG,3.769659e-06,Significant


[1m[22m[36mℹ[39m In argument: `p_value = leveneTest(value ~ day)$"Pr(>F)"[1]`.
[36mℹ[39m In group 1: `marker = C4`.
[33m![39m group coerced to factor.


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Levene's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.3372212,Not Significant
C8,0.4657139,Not Significant
TG,0.1311042,Not Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Bartlett's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.5981315,Not Significant
C8,0.1154739,Not Significant
TG,4.61835e-06,Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Fligner-Killeen"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.30342416,Not Significant
C8,0.759534,Not Significant
TG,0.08229296,Not Significant


In [20]:
# Treatment TB
# The p-value is greater than 0.05, it suggests no significant difference in variances for the group associated with each marker.

df = a_tb

df_long <- df %>%
  pivot_longer(cols = starts_with("analyte_value"), names_to = "day", values_to = "value") %>%
  mutate(day = ifelse(day == "analyte_value.DAY1", "Day1", "Day8"))

# Perform F-test for equal variances for each marker and check statistical significance
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = var.test(value[day == "Day1"], value[day == "Day8"])$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("F-test (var.test)")
equal_variance_results

# Levene's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = leveneTest(value ~ day)$"Pr(>F)"[1],
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Levene's test")
equal_variance_results

# Bartlett's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = bartlett.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Bartlett's test")
equal_variance_results

# Fligner-Killeen test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = fligner.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Fligner-Killeen")
equal_variance_results


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "F-test (var.test)"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.75398551,Not Significant
C8,0.93517333,Not Significant
TG,0.01479721,Significant


[1m[22m[36mℹ[39m In argument: `p_value = leveneTest(value ~ day)$"Pr(>F)"[1]`.
[36mℹ[39m In group 1: `marker = C4`.
[33m![39m group coerced to factor.


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Levene's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.58223428,Not Significant
C8,0.71467355,Not Significant
TG,0.08516237,Not Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Bartlett's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.70728236,Not Significant
C8,0.88584904,Not Significant
TG,0.01320828,Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Fligner-Killeen"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.41780912,Not Significant
C8,0.60242503,Not Significant
TG,0.07072804,Not Significant


In [21]:
# Treatment TC
# The p-value is greater than 0.05, it suggests no significant difference in variances for the group associated with each marker.

df = a_tc

df_long <- df %>%
  pivot_longer(cols = starts_with("analyte_value"), names_to = "day", values_to = "value") %>%
  mutate(day = ifelse(day == "analyte_value.DAY1", "Day1", "Day8"))

# Perform F-test for equal variances for each marker and check statistical significance
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = var.test(value[day == "Day1"], value[day == "Day8"])$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("F-test (var.test)")
equal_variance_results

# Levene's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = leveneTest(value ~ day)$"Pr(>F)"[1],
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Levene's test")
equal_variance_results

# Bartlett's test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = bartlett.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Bartlett's test")
equal_variance_results

# Fligner-Killeen test for equal variances for each marker
equal_variance_results <- df_long %>%
  group_by(marker) %>%
  summarize(
    p_value = fligner.test(value ~ day)$p.value,
    significance = ifelse(p_value <= 0.05, "Significant", "Not Significant")
  )

# Print the results
print("Equality in the variances between Markers between Day 1 and Day 8, under treatment TC")
print("Fligner-Killeen")
equal_variance_results


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "F-test (var.test)"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.22127536,Not Significant
C8,0.42047836,Not Significant
TG,0.08528433,Not Significant


[1m[22m[36mℹ[39m In argument: `p_value = leveneTest(value ~ day)$"Pr(>F)"[1]`.
[36mℹ[39m In group 1: `marker = C4`.
[33m![39m group coerced to factor.


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Levene's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.1075258,Not Significant
C8,0.9123884,Not Significant
TG,0.1497567,Not Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Bartlett's test"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.1851524,Not Significant
C8,0.3610777,Not Significant
TG,0.1093902,Not Significant


[1] "Equality in the variances between Markers between Day 1 and Day 8, under treatment TC"
[1] "Fligner-Killeen"


marker,p_value,significance
<fct>,<dbl>,<chr>
C4,0.1227799,Not Significant
C8,0.5546441,Not Significant
TG,0.1440726,Not Significant


In [22]:
# CONCLUSIONS :
# with a few exceptions, these tests tell us that the VARIANCES of the distributions 
# of the analyte values are equal between Day 1 and Day 8

In [23]:
# Given the gaussian distribution and the homoscedasticity of the analyte values for each marker under the treatment TA, TB, TC, 
# we could use a standard t.test var.equal = TRUE. Due to several exceptions, we will use also Welch T-test. 

In [24]:
# treatment TA

In [25]:
df = a_ta

# use T.test with var.equal = TRUE

results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1, var.equal=TRUE)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TA :")
print("use T.test that assumes equal variances :")
print(results)
      
# use Welch T.test
      
results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TA :")
print("use Welsch T.test:")
print(results)

[1] "Differences in the Treatment TA :"
[1] "use T.test that assumes equal variances :"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m  0.145  Not Significant   
[90m2[39m C8     [90m<htest>[39m  0.314  Not Significant   
[90m3[39m TG     [90m<htest>[39m  0.071[4m4[24m Not Significant   
[1] "Differences in the Treatment TA :"
[1] "use Welsch T.test:"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m  0.147  Not Significant   
[90m2[39m C8     [90m<htest>[39m  0.321  Not Significant   
[90m3[39m TG     [90m<htest>[39m  0.073[4m0[24m Not Significant   


In [26]:
# treatment TB

In [27]:
df = a_tb 

# use T.test with var.equal = TRUE

results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1, var.equal=TRUE)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TB :")
print("use T.test that assumes equal variances :")
print(results)
      
# use Welch T.test
      
results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TB :")
print("use Welsch T.test:")
print(results)

[1] "Differences in the Treatment TB :"
[1] "use T.test that assumes equal variances :"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m  0.483  Not Significant   
[90m2[39m C8     [90m<htest>[39m  0.678  Not Significant   
[90m3[39m TG     [90m<htest>[39m  0.096[4m6[24m Not Significant   
[1] "Differences in the Treatment TB :"
[1] "use Welsch T.test:"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m  0.469  Not Significant   
[90m2[39m C8     [90m<htest>[39m  0.675  Not Significant   
[90m3[39m TG     [90m<htest>[39m  0.068[4m2[24m Not Significant   


In [28]:
# treatment TC

In [29]:
df = a_tc

# use T.test with var.equal = TRUE

results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1, var.equal=TRUE)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TC :")
print("use T.test that assumes equal variances :")
print(results)
      
# use Welch T.test
      
results <- df %>%
  group_by(marker) %>%
  summarise(
    t_test = list(t.test(analyte_value.DAY8, analyte_value.DAY1)),
    .groups = 'drop'
  ) %>%
  mutate(
    p_value = sapply(t_test, function(x) x$p.value),
    significance_label = ifelse(p_value < 0.05, "Significant", "Not Significant")
  )
# Print the results
print("Differences in the Treatment TC :")
print("use Welsch T.test:")
print(results)

[1] "Differences in the Treatment TC :"
[1] "use T.test that assumes equal variances :"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m  0.667  Not Significant   
[90m2[39m C8     [90m<htest>[39m  0.935  Not Significant   
[90m3[39m TG     [90m<htest>[39m  0.052[4m1[24m Not Significant   
[1] "Differences in the Treatment TC :"
[1] "use Welsch T.test:"
[90m# A tibble: 3 × 4[39m
  marker t_test  p_value significance_label
  [3m[90m<fct>[39m[23m  [3m[90m<list>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m             
[90m1[39m C4     [90m<htest>[39m   0.601 Not Significant   
[90m2[39m C8     [90m<htest>[39m   0.925 Not Significant   
[90m3[39m TG     [90m<htest>[39m   0.151 Not Significant   


In [30]:
# CONCLUSIONS : for each marker at Day 8 and Day 1 under each treatment, 
# we do not detect statistical significance difference of the analyte values between Day 1 and Day 8.