In [1]:
library(rdydisstools)
setwd('~/notebooks/dissertation')
loadpackages(c('doParallel', 'foreach', 'logging', 'glue', 'grid', 'gridExtra', 'dplyr', 'ggplot2', 'DescTools', 'careless', 'lsr'))
nDatasets <- 5000

In [2]:
registerDoParallel(72)
h2data <- foreach(i=1:nDatasets, .verbose=FALSE) %dopar% {
    # Initialize logging
    addHandler(writeToFile, file=glue::glue('~/notebooks/dissertation/logs/h2/sim{i}-2.log'), level='INFO')
    logstring <- glue::glue('dis.h2.{i}-2')
    
    # Specify path for sample
    datasetstring <- glue::glue("~/notebooks/dissertation/data/simsample{i}.RData")
    logging::loginfo(glue::glue('dataset loading from data/simsample{i}'), logger=logstring)
    
    #load sample dataset into memory
    load(datasetstring)
    logging::loginfo(glue::glue('dataframe has been loaded from: {datasetstring}-2'), logger=logstring)
    
    #Evaluate Hypothesis 1
    t(dispatchHypothesisEval(df, hypothesis="h2", lastColumn=100))
} %>% do.call("rbind.data.frame", .)

save(h2data, file='~/notebooks/dissertation/artifacts/h2/h2data.RData')

In [4]:
load("~/notebooks/dissertation/artifacts/h2/h2data.RData")

In [5]:
h2data %>% mutate(antiPass=case_when((ht_b < hc_b) & !(hasOverlap) ~ 1,
                                      TRUE ~ 0)) %>% 
           summarize(npassed=sum(hypTest), nAntiPassed=sum(antiPass), passed = (sum(hypTest)/5000), xbar_ht=mean(ht_b), xbar_hc=mean(hc_b))

npassed,nAntiPassed,passed,xbar_ht,xbar_hc
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
294,3,0.0588,0.1415291,0.08444369


In [6]:
t.test(h2data$ht_b, h2data$hc_b, paired=TRUE, alternative="greater")
cohensD(h2data$ht_b, h2data$hc_b, method='paired')
wilcox.test(h2data$ht_b, h2data$hc_b, alternative="greater")


	Paired t-test

data:  h2data$ht_b and h2data$hc_b
t = 70.196, df = 4999, p-value < 2.2e-16
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
 0.05574748        Inf
sample estimates:
mean of the differences 
             0.05708537 



	Wilcoxon rank sum test with continuity correction

data:  h2data$ht_b and h2data$hc_b
W = 16772833, p-value < 2.2e-16
alternative hypothesis: true location shift is greater than 0
