In [1]:
library(rdydisstools)
setwd('~/notebooks/dissertation')
loadpackages(c('doParallel', 'foreach', 'logging', 'glue', 'grid', 'gridExtra', 'dplyr', 'ggplot2', 'DescTools', 'careless'))
nDatasets <- 5000

In [2]:
registerDoParallel(72)
h1data <- foreach(i=1:nDatasets, .verbose=FALSE) %dopar% {
    # Initialize logging
    addHandler(writeToFile, file=glue::glue('~/notebooks/dissertation/logs/h1/sim{i}.log'), level='INFO')
    logstring <- glue::glue('dis.h1.{i}')
    
    # Specify path for sample
    datasetstring <- glue::glue("~/notebooks/dissertation/data/simsample{i}.RData")
    logging::loginfo(glue::glue('dataset loading from data/simsample{i}'), logger=logstring)
    
    #load sample dataset into memory
    load(datasetstring)
    logging::loginfo(glue::glue('dataframe has been loaded from: {datasetstring}'), logger=logstring)
    
    #Evaluate Hypothesis 1
    t(dispatchHypothesisEval(df, hypothesis="h1", lastColumn=100))
} %>% do.call("rbind.data.frame", .)

save(h1data, file='~/notebooks/dissertation/artifacts/h1/h1data.RData')

In [5]:
load("~/notebooks/dissertation/artifacts/h1/h1data.RData")

In [29]:
h1data %>% mutate(antiPass=case_when((ht_b < hc_b) & !(hasOverlap) ~ 1,
                                      TRUE ~ 0)) %>% 
           summarize(npassed=sum(hypTest), nAntiPassed=sum(antiPass), passed = (sum(hypTest)/5000), xbar_ht=mean(ht_b), xbar_hc=mean(hc_b))

npassed,nAntiPassed,passed,xbar_ht,xbar_hc
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1741,3,0.3482,0.2887405,0.1749188


In [23]:
t.test(h1data$ht_b, h1data$hc_b, paired=TRUE, alternative="greater")
cohensD(h1data$ht_b, h1data$hc_b, method='paired')
wilcox.test(h1data$ht_b, h1data$hc_b, alternative="greater")



	Paired t-test

data:  h1data$ht_b and h1data$hc_b
t = 86.586, df = 4999, p-value < 2.2e-16
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
 0.111659      Inf
sample estimates:
mean of the differences 
              0.1138217 



	Wilcoxon rank sum test with continuity correction

data:  h1data$ht_b and h1data$hc_b
W = 19501286, p-value < 2.2e-16
alternative hypothesis: true location shift is greater than 0
