# 1. Regression Code

### The Code Below Performs a Regression from X to Y

Copyright (c) 2010-2013  Jonas Peters  [peters@stat.math.ethz.ch]

```
train_*** <- function(X, y, pars)  (e.g. train_linear or train_gp)
```
```
INPUT:
  X         nxp matrix of training inputs (n data points, p dimensions)
  Y         vector of N training outputs (n data points)
  pars      list containing parameters of the regression method

OUTPUT:
  result    list with the result of the regression
  $model        list of learned model (e.g., weight vector)
  $Yfit         fitted outputs for training inputs according to the learned model
  $residuals    noise values (e.g., residuals in the additive noise case) 
```

###### Linear Regression

In [1]:
train_linear <- function(X,y,pars = list()) {
    mod <- lm(y ~ X)
    result <- list()
    result$Yfit = as.matrix(mod$fitted.values)
    result$residuals = as.matrix(mod$residuals)
    result$model = mod
    #for coefficients see list(mod$coef)
    return(result)
}

##### GP Regression

In [2]:
gp_regression <- function(X,y, pars=list()) {
    options=gpOptions("ftc")
    options$kern$comp=list("rbf","white")
    #options$learnScales=TRUE
    model<-gpCreate(dim(X)[2],1,X,y,options)
    y2<-gpOut(model,X)
    model$Yfit<-y2
    model$residuals<-y-y2
    return(model)
}

train_gp <- function(X,y,pars = list()) {
    library(gptk)
    mod <- gp_regression(as.matrix(X),as.matrix(y))
    result <- list()
    result$Yfit = mod$Yfit
    result$residuals = mod$residuals
    result$model = mod
    return(result)
}

##### Gam Regression

In [3]:
library(mgcv)
train_gam <- function(X,y,pars = list(numBasisFcts = 10)) {
    if(!("numBasisFcts" %in% names(pars) ))  { 
        pars$numBasisFcts = 10
    }
    
    p <- dim(as.matrix(X))
    if(p[1]/p[2] < 3*pars$numBasisFcts) {
        pars$numBasisFcts <- ceiling(p[1]/(3*p[2]))
        cat("changed number of basis functions to    ", pars$numBasisFcts, "    in order to have enough samples per basis function\n")
    }
    dat <- data.frame(as.matrix(y),as.matrix(X))
    coln <- rep("null",p[2]+1)
    for(i in 1:(p[2]+1)) {
        coln[i] <- paste("var",i,sep="")
    }
    colnames(dat) <- coln
    labs<-"var1 ~ "
    if(p[2] > 1) {
        for(i in 2:p[2]) {
            labs<-paste(labs,"s(var",i,",k = ",pars$numBasisFcts,") + ",sep="")
        }
    }
    labs<-paste(labs,"s(var",p[2]+1,",k = ",pars$numBasisFcts,")",sep="")
    mod_gam <- FALSE
    try(mod_gam <- gam(formula=formula(labs), data=dat),silent = TRUE)
    if(typeof(mod_gam) == "logical") {
        cat("There was some error with gam. The smoothing parameter is set to zero.\n")
        labs<-"var1 ~ "
        if(p[2] > 1) {
            for(i in 2:p[2]) {
                labs<-paste(labs,"s(var",i,",k = ",pars$numBasisFcts,",sp=0) + ",sep="")
            }
        }
        labs<-paste(labs,"s(var",p[2]+1,",k = ",pars$numBasisFcts,",sp=0)",sep="")
        mod_gam <- gam(formula=formula(labs), data=dat)
    }
    result <- list()
    result$Yfit <- as.matrix(mod_gam$fitted.values)
    result$residuals <- as.matrix(mod_gam$residuals)
    result$model <- mod_gam 
    result$df <- mod_gam$df.residual     
    result$edf <- mod_gam$edf     
    result$edf1 <- mod_gam$edf1     
    
    # for degree of freedom see mod_gam$df.residual
    # for aic see mod_gam$aic
    return(result)
}

Loading required package: nlme
This is mgcv 1.8-24. For overview type 'help("mgcv-package")'.


##### GAM Boost

In [4]:
library(mboost)
train_GAMboost <- function(X,y,pars = list()) #
{
    ## begin old version
    # EXPLANATION: surprisingly, it turned out that this cannot be applied to large p (private discussion with T. Hothorn in Sep 2013)
    # yy <- y
    # dat <- data.frame(cbind(yy,X))
    # gb <- gamboost(yy ~ .,data=dat, baselearner = "bbs")
    ## end old version
    
    ## begin new version
    dat <- as.data.frame(X)
    bl <- lapply(dat, bbs)
    gb <- mboost_fit(bl, y)
    ## end new version
    
    result <- list()
    result$Yfit <- gb$fitted()
    result$residuals <- gb$resid()
    result$model <- gb
    return(result)
}

Loading required package: parallel
Loading required package: stabs
This is mboost 2.9-1. See ‘package?mboost’ and ‘news(package  = "mboost")’
for a complete list of changes.



##### Pen GAM

In [5]:
train_penGAM <- function(X,y,pars = list(kCV = 3))
{
    # Meier, L., van de Geer, S. and Bühlmann, P. (2009). High-Dimensional Additive Modeling. Annals of Statistics 37, 3779-3821
    # code available from Lukas Meier [meier@stat.math.ethz.ch]
    # !! It is not so easy to find good values for cross-validation !!
    war <- library(penGAM, logical.return=TRUE)
    if(!war)
    {
        error("penGAM regression cannot be used because the package (based on Meier, L. et al. High-Dimensional Additive Modeling. 2009) cannot be loaded. Code is available from Lukas Meier [meier@stat.math.ethz.ch]")
    }
    if(is.matrix(X))
    {
        dimX <- dim(X)[1]
    } else if(is.vector(X))
    {
        dimX <- length(X)
        X <- as.matrix(X)
    }
    
    # Cross-Validation
    
    allSets <- getSetsForKFoldCV(dimX, pars$kCV)
    trainSets <- allSets$trainSets
    testSets <- allSets$testSets 
    
    lambda.pen <- 0.955^seq(1,120,by=30)
    #lambda.pen  <- 0.9^(1:20) ## 0.955^(1:110) 0.955^(1:110) ## Special!!!
    #lambda.pen  <- c(0.5,0.2,0.1) ## 0.955^(1:110) 0.955^(1:110) ## Special!!!
    #lambda.pen  <- 0.95 ## 0.955^(1:110) 0.955^(1:110) ## Special!!!
    #lambda.pen = c(1, 0.9, 0.8, 0.5, 0.3, 0.1),
    
    lambda.curv <- c(2^(4:(-4)), 0)
    #lambda.curv <- c(2^(6:(-6)), 0)
    #lambda.curv = c(6, 4, 2, 1), 
    
    SS <- matrix(0,length(lambda.pen), length(lambda.curv))
    for(i in 1:pars$kCV)
    {
        kkk <- round(sqrt(dimX))
        fit.GAM <- penGAM(as.matrix(X[trainSets[[i]],]), as.matrix(y[trainSets[[i]]]), 
                          lambda.pen,
                          lambda.curv,
                          knots = kkk, 
                          #########################
                          # is this a good idea??????
                          #########################
                          model = LinReg(), control = grpl.control(trace = 0))
        pred <- predict(fit.GAM, newdata = as.matrix(X[testSets[[i]],]))
        for(pp in 1:length(lambda.pen))
        {
            for(cc in 1:length(lambda.curv))
            {
                SS[pp,cc] <- SS[pp,cc] + sum((pred[pp,cc,] - y[testSets[[i]]])^2)
            }
        }
    }
    
    #choose best lambdas
    opt <- arrayInd(which.min(SS),dim(SS))
    lambda.pen.opt <- lambda.pen[opt[1]]
    lambda.curv.opt <- lambda.curv[opt[2]]
    cat("Optimal value for lambda.pen: ", lambda.pen.opt, "\n")
    cat("Optimal value for lambda.curv: ", lambda.curv.opt, "\n")
    
    
    # fit a gam model    
    fit.GAM <- penGAM(as.matrix(X), as.matrix(y), 
                      lambda.pen.opt,
                      lambda.curv.opt,
                      knots = round(sqrt(dim(X)[1])),
                      model = LinReg(), control = grpl.control(trace = 0))
    
    result <- list()
    result$Yfit <- predict(fit.GAM,X)
    result$residuals <- y - result$Yfit
    result$model <- fit.GAM
    return(result)
}

##### Lasso

In [6]:
war <- library(glmnet, logical.return=TRUE)
if(!war)
{
    cat("The package glmnet is not installed. This only means that lasso regression cannot be performed. However, this has no effect of the standard GAM version.\n")
}
train_lasso <- function(X,y,pars = list())
{
    cvres <- cv.glmnet(X,y)
    mod <- glmnet(X,y,lambda = cvres$lambda.1se)
    result <- list()
    result$Yfit <- predict(mod,X)
    result$residuals <- y - result$Yfit
    result$model <- mod
    return(result)
}

“there is no package called ‘glmnet’”

The package glmnet is not installed. This only means that lasso regression cannot be performed. However, this has no effect of the standard GAM version.


##### Logistic Regression

In [None]:
library(glm)
logistic_regression <- function()


##### Train Model

In [7]:
train_model <- function(f,X,y,pars = list())
{
    result <- f(X,y,pars)
}

# 2. Independence Test

##### The Code Below Performs an Independence Test using the Hilbert-Schmidt Independence Critertion
Copyright (c) 2010 - 2013  Jonas Peters  [peters@stat.math.ethz.ch]

outputs the test statistic (N*HSIC) and the critical value (according to alpha). If the test statistic is larger than the critical value, H_0 (X and Y are independent) is rejected.

In [8]:
library(kernlab)
indtestHsic <- function(x,y,alpha=0.05, pars = list(method = "IncChol")) {    
    if(is.matrix(x)==FALSE){
        x<-as.matrix(x)}
    if(is.matrix(y)==FALSE){
        y<-as.matrix(y)}
    len <- dim(x)[1]
    
    # compute distance matrices
    xnorm<-as.matrix(dist(x,method="euclidean",diag=TRUE,upper=TRUE))
    xnorm<-xnorm^2
    ynorm<-as.matrix(dist(y,method="euclidean",diag=TRUE,upper=TRUE))
    ynorm<-ynorm^2
    
    # choose median heuristic for bandwidth
    if(len>1000) {
        sam <- sample(1:len,1000)
        xhilf<-xnorm[sam,sam]
        yhilf<-ynorm[sam,sam]
    } else {
        xhilf<-xnorm
        yhilf<-ynorm
    }
    
    sigmax<-sqrt(0.5*median(xhilf[lower.tri(xhilf,diag=FALSE)]))
    sigmay<-sqrt(0.5*median(yhilf[lower.tri(yhilf,diag=FALSE)]))
    
    
    if(pars$method == "Exact" || pars$method == "ExactFastTrace") {
        ###
        # Compute GramMat
        ###
        ptm <- proc.time()
        KX <- exp(-xnorm/(2*sigmax^2))
        KY <- exp(-ynorm/(2*sigmay^2))
        timeGramMat <- (proc.time() - ptm)[1]
        
        ###
        # Compute HSIC
        ###
        if(pars$method == "Exact") {
            ptm <- proc.time()
            H<-diag(1,len)-1/len*matrix(1,len,len)
            HSIC <- 1/(len^2)*sum(diag(KX%*%H%*%KY%*%H))
            timeHSIC <- (proc.time() - ptm)[1]
        }
        if(pars$method == "ExactFastTrace") {
            ptm <- proc.time()
            H<-diag(1,len)-1/len*matrix(1,len,len)
            HSIC <- 1/(len^2) * sum((KX - 1/len*(KX%*%rep(1,len))%*%t(rep(1,len)))*t(KY - 1/len*(KY%*%rep(1,len))%*%t(rep(1,len))))
            timeHSIC <- (proc.time() - ptm)[1]
        }
        
        ###
        # Compute Gamma Approximation
        ###
        ptm <- proc.time()
        mux <- (sum(KX)-len)/(len*(len-1))
        muy <- (sum(KY)-len)/(len*(len-1))
        
        mean_h0 <- 1/len*(1+mux*muy-mux-muy)
        var_h0 <- (2*(len-4)*(len-5))/(len*(len-1)*(len-2)*(len-3)) * 1/((len-1)^2)*sum((KX - 1/len*(KX%*%rep(1,len))%*%t(rep(1,len)))*t(KX - 1/len*(KX%*%rep(1,len))%*%t(rep(1,len)))) * 1/((len-1)^2)*sum((KY - 1/len*(KY%*%rep(1,len))%*%t(rep(1,len)))*t(KY - 1/len*(KY%*%rep(1,len))%*%t(rep(1,len))))
        timeGamma <- (proc.time() - ptm)[1]
        
    }
    
    if(pars$method == "IncChol" || pars$method == "IncCholFastTrace") {
        ###
        # Compute GramMat
        ###
        ## incomplete cholesky decomposition
        ptm <- proc.time()
        LX <- inchol(x, kernel="rbfdot", kpar=list(sigma=1/(2*sigmax^2)), tol = 0.0001, maxiter = 300)
        LX <- matrix(LX,nrow=dim(LX)[1], ncol=dim(LX)[2])
        LY <- inchol(y, kernel="rbfdot", kpar=list(sigma=1/(2*sigmay^2)), tol = 0.0001, maxiter = 300)
        LY <- matrix(LY,nrow=dim(LY)[1], ncol=dim(LY)[2])
        LXc <- LX-1/len*(as.matrix(rep(1,len))%*%colSums(LX))
        LYc <- LY-1/len*(as.matrix(rep(1,len))%*%colSums(LY))
        timeGramMat <- (proc.time() - ptm)[1]
        
        ###
        # Compute HSIC
        ###
        if(pars$method == "IncChol") {
            ptm <- proc.time()
            HSIC <- 1/(len^2)*sum(diag((t(LX)%*%LYc)%*%(t(LY)%*%LXc)))
            timeHSIC <- (proc.time() - ptm)[1]
        }
        if(pars$method == "IncCholFastTrace") {
            ptm <- proc.time()
            HSIC <- 1/(len^2)*sum( (t(LX)%*%LYc) * t((t(LY)%*%LXc)))
            timeHSIC <- (proc.time() - ptm)[1]
        }
        
        ###
        # Compute Gamma Approximation
        ###
        ptm <- proc.time()
        mux <- (crossprod(colSums(LX))-len)/(len*(len-1))
        muy <- (crossprod(colSums(LY))-len)/(len*(len-1))
        
        mean_h0 <- 1/len*(1+mux*muy-mux-muy)
        var_h0 <- (2*(len-4)*(len-5))/(len*(len-1)*(len-2)*(len-3))*1/((len-1)^2)*sum(diag((t(LX)%*%LXc)%*%(t(LX)%*%LXc)))*1/((len-1)^2)*sum(diag((t(LY)%*%LYc)%*%(t(LY)%*%LYc)))
        timeGamma <- (proc.time() - ptm)[1]
    }        
    
    a <- (mean_h0^2)/var_h0
    b <- len*var_h0/mean_h0
    critical_value <- qgamma(1-alpha,shape=a,scale=b)
    p_value <- pgamma(len*HSIC,shape=a,scale=b, lower.tail=FALSE)
    resu <- list(statistic = len*HSIC, crit.value = critical_value, p.value = p_value, time = c(timeGramMat,timeHSIC,timeGamma))
    return(resu)
}


# 3. Additive Noise Model Code


In [30]:
test <- function(x, y, reg_model = train_linear, ind_test = indtestHsic, cut_off = 10, verbose = FALSE) {
    xToY <- train_linear(x, y)
    yToX <- reg_model(y, x)
    xToY.P <- ind_test(xToY$residuals, x)$p.value
    yToX.P <- ind_test(yToX$residuals, y)$p.value
    if (xToY.P > 10 * yToX.P) {
        result <- "X"
    } else if (xToY.P * 10 < yToX.P) {
        result <- "Y"
    } else {
        result <- "NA"
    }

    if (verbose) {
        message("P value for forward is ", xToY.P, ", P value for backword is ", yToX.P, " result is ", ifelse(is.na(result), "Inconclusive", result))
    }
        
    c(result, xToY.P, yToX.P)
}

# 4. Function to Test a Pair

In [31]:
process_file = function(filepath) {
    read.delim(filepath, header = FALSE, sep="", stringsAsFactors = FALSE)    
}

In [32]:
test_pair <- function(file_path) {
    data <- process_file(file_path)
    result <- data.frame(FileName = character(),
                         TestParameter = character(),
                         RegressionModel = character(),
                         Collection = factor(),
                         ForwardPValue = double(),
                         BackwardPValue = double(),
                         Result = factor())

    message("Start testing file: ", file_path)
    message("/n")
    result <- test(
        data[["V1"]],
        data[["V2"]]
    )

    result
}


# 5. Now, test all of the pairs

##### Pair 1

In [33]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0001.txt"
results_1 = test_pair(file_path)
print(results_1)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0001.txt
/n


[1] "X"                    "6.37634844718238e-17" "2.73490976614775e-25"


##### Pair 2

In [34]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0002.txt"
results_2 = test_pair(file_path)
print(results_2)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0002.txt
/n


[1] "NA"                   "8.97362197664129e-17" "2.39595626120772e-17"


##### Pair 3

In [35]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0003.txt"
results_3 = test_pair(file_path)
print(results_3)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0003.txt
/n


[1] "X"                    "9.53978927992768e-08" "2.04386366515222e-19"


##### Pair 4

In [36]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0004.txt"
results_4 = test_pair(file_path)
print(results_4)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0004.txt
/n


[1] "X"                    "0.0182466355584996"   "3.77426466862528e-06"


##### Pair 5

In [37]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0005.txt"
results_5 = test_pair(file_path)
print(results_5)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0005.txt
/n


[1] "Y"                    "0"                    "1.02635586301467e-59"


##### Pair 6

In [38]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0006.txt"
results_6 = test_pair(file_path)
print(results_6)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0006.txt
/n


[1] "Y"                     "0"                     "1.05200174346854e-124"


##### Pair 7

In [39]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0007.txt"
results_7 = test_pair(file_path)
print(results_7)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0007.txt
/n


[1] "Y"                   "0"                   "2.0237658753334e-78"


##### Pair 8

In [40]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0008.txt"
results_8 = test_pair(file_path)
print(results_8)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0008.txt
/n


[1] "Y"                     "3.40658262807539e-320" "3.79807337205672e-44" 


##### Pair 9

In [41]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0009.txt"
results_9 = test_pair(file_path)
print(results_9)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0009.txt
/n


[1] "Y"                     "0"                     "1.68400141568654e-106"


##### Pair 10

In [42]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0010.txt"
results_10 = test_pair(file_path)
print(results_10)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0010.txt
/n


[1] "Y"                    "0"                    "1.50243743890298e-95"


##### Pair 11

In [43]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0011.txt"
results_11 = test_pair(file_path)
print(results_11)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0011.txt
/n


[1] "Y"                     "0"                     "9.56854787705103e-118"


##### Pair 12

In [44]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0012.txt"
results_12 = test_pair(file_path)
print(results_12)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0012.txt
/n


[1] "Y"                     "1.45260311607393e-294" "5.97325267051758e-188"


##### Pair 13

In [45]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0013.txt"
results_13 = test_pair(file_path)
print(results_13)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0013.txt
/n


[1] "X"                    "1.38622298742076e-23" "4.3499765635473e-55" 


##### Pair 14

In [46]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0014.txt"
results_14 = test_pair(file_path)
print(results_14)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0014.txt
/n


[1] "X"                    "3.06317465573772e-31" "1.35991516341853e-47"


##### Pair 15

In [47]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0015.txt"
results_15 = test_pair(file_path)
print(results_15)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0015.txt
/n


[1] "X"                    "1.96603213812734e-22" "3.86183978578115e-32"


##### Pair 16


In [48]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0016.txt"
results_16 = test_pair(file_path)
print(results_16)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0016.txt
/n


[1] "X"                    "3.13395707433834e-05" "1.81505912952658e-37"


##### Pair 17

In [53]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0017.txt"
results_17 = test_pair(file_path)
print(results_17)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0017.txt
/n
“NAs produced by integer overflow”

ERROR: Error in while (residue > tol && counter < maxiter) {: missing value where TRUE/FALSE needed


##### Pair 18

In [50]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0018.txt"
results_18 = test_pair(file_path)
print(results_18)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0018.txt
/n


[1] "X"                    "1.39904990486191e-50" "1.23007030992263e-54"


##### Pair 19

In [51]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0019.txt"
results_19 = test_pair(file_path)
print(results_19)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0019.txt
/n


[1] "X"                    "0.281230815317771"    "1.17196872553625e-16"


##### Pair 20

In [52]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0020.txt"
results_20 = test_pair(file_path)
print(results_20)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0020.txt
/n


[1] "NA"                   "6.40820892499059e-22" "9.05040194501355e-23"


##### Pair 21

In [54]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0021.txt"
results_21 = test_pair(file_path)
print(results_21)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0021.txt
/n


[1] "X"                    "5.07967942846438e-10" "8.91317987145589e-23"


##### Pair 22

In [55]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0022.txt"
results_22 = test_pair(file_path)
print(results_22)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0022.txt
/n


[1] "Y"                    "4.7117384952669e-13"  "4.79768519786228e-05"


##### Pair 23

In [56]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0023.txt"
results_23 = test_pair(file_path)
print(results_23)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0023.txt
/n


[1] "NA"                   "9.89603924501682e-17" "1.72109075188294e-16"


##### Pair 24

In [57]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0024.txt"
results_24 = test_pair(file_path)
print(results_24)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0024.txt
/n


[1] "NA"                  "0.00066727323124985" "0.00277748938688196"


##### Pair 25

In [58]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0025.txt"
results_25 = test_pair(file_path)
print(results_25)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0025.txt
/n


[1] "X"                    "0.00321773351971538"  "1.06707386939045e-10"


##### Pair 26

In [59]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0026.txt"
results_26 = test_pair(file_path)
print(results_26)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0026.txt
/n


[1] "X"                    "3.91369196514289e-07" "9.90686814311803e-10"


##### Pair 27

In [60]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0027.txt"
results_27 = test_pair(file_path)
print(results_27)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0027.txt
/n


[1] "NA"                   "3.06057480904574e-09" "6.64714133286598e-10"


##### Pair 28

In [61]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0028.txt"
results_28 = test_pair(file_path)
print(results_28)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0028.txt
/n


[1] "Y"                    "8.00370170766553e-34" "9.53335919385412e-31"


##### Pair 29

In [62]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0029.txt"
results_29 = test_pair(file_path)
print(results_29)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0029.txt
/n


[1] "X"                    "0.00820911343301002"  "2.41704862922942e-27"


##### Pair 30

In [63]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0030.txt"
results_30 = test_pair(file_path)
print(results_30)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0030.txt
/n


[1] "NA"                   "6.17210042480851e-05" "0.000110063433065037"


##### Pair 31

In [64]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0031.txt"
results_31 = test_pair(file_path)
print(results_31)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0031.txt
/n


[1] "X"                    "0.00292499303575737"  "3.79324094026209e-08"


##### Pair 32

In [65]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0032.txt"
results_32 = test_pair(file_path)
print(results_32)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0032.txt
/n


[1] "X"                     "4.94328461605578e-75"  "1.96390218941051e-182"


##### Pair 33

In [381]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0033.txt"
results_33 = test_pair(file_path)
print(results_33)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0033.txt
/n
Using Model: train_linear


[1] "TRUE"                 "0.598686933110165"    "1.45935412409211e-07"


##### Pair 34

In [382]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0034.txt"
results_34 = test_pair(file_path)
print(results_34)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0034.txt
/n
Using Model: train_linear


[1] "NA"                 "0.027788353539999"  "0.0110526998878855"


##### Pair 35

In [383]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0035.txt"
results_35 = test_pair(file_path)
print(results_35)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0035.txt
/n
Using Model: train_linear


[1] "FALSE"                "1.20535554983618e-13" "0.00119921112646913" 


##### Pair 36

In [384]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0036.txt"
results_36 = test_pair(file_path)
print(results_36)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0036.txt
/n
Using Model: train_linear


[1] "FALSE"                "1.52778891369515e-05" "0.000495492798224535"


##### Pair 37

In [385]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0037.txt"
results_37 = test_pair(file_path)
print(results_37)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0037.txt
/n
Using Model: train_linear


[1] "FALSE"                "5.90867269009495e-27" "5.77115909036308e-05"


##### Pair 38

In [386]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0038.txt"
results_38 = test_pair(file_path)
print(results_38)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0038.txt
/n
Using Model: train_linear


[1] "NA"                   "8.47697955798957e-08" "1.81217091840299e-07"


##### Pair 39

In [387]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0039.txt"
results_39 = test_pair(file_path)
print(results_39)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0039.txt
/n
Using Model: train_linear


[1] "TRUE"                 "0.295730701377701"    "9.23602354210878e-10"


##### Pair 40

In [388]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0040.txt"
results_40 = test_pair(file_path)
print(results_40)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0040.txt
/n
Using Model: train_linear


[1] "TRUE"                 "0.00920820539421438"  "3.58645381465542e-26"


##### Pair 41

In [389]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0041.txt"
results_41 = test_pair(file_path)
print(results_41)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0041.txt
/n
Using Model: train_linear


[1] "TRUE"                 "6.43269278606615e-05" "3.53094050383435e-15"


##### Pair 42

In [391]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0042.txt"
results_42 = test_pair(file_path)
print(results_42)































































































































































“NAs introduced by coercion”Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0042.txt
/n
Using Model: train_linear


ERROR: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases


##### Pair 43

In [66]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0043.txt"
results_43 = test_pair(file_path)
print(results_43)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0043.txt
/n


[1] "NA" "0"  "0" 


##### Pair 44

In [67]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0044.txt"
results_44 = test_pair(file_path)
print(results_44)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0044.txt
/n


[1] "Y"                     "3.17228050700222e-305" "7.54451262091458e-184"


##### Pair 45

In [68]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0045.txt"
results_45 = test_pair(file_path)
print(results_45)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0045.txt
/n


[1] "NA" "0"  "0" 


##### Pair 46

In [69]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0046.txt"
results_46 = test_pair(file_path)
print(results_46)

Start testing file: /Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0046.txt
/n


[1] "NA" "0"  "0" 


##### Pair 47

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0047.txt"
results_47 = test_pair(file_path)
print(results_47)

##### Pair 48

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0048.txt"
results_48 = test_pair(file_path)
print(results_48)

##### Pair 49

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0049.txt"
results_49 = test_pair(file_path)
print(results_49)

##### Pair 50

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0050.txt"
results_50 = test_pair(file_path)
print(results_50)

##### Pair 51

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0051.txt"
results_51 = test_pair(file_path)
print(results_51)

##### Pair 52

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0052.txt"
results_52 = test_pair(file_path)
print(results_52)

##### Pair 53

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0053.txt"
results_53 = test_pair(file_path)
print(results_53)

##### Pair 54

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0054.txt"
results_54 = test_pair(file_path)
print(results_54)

##### Pair 55

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0055.txt"
results_55 = test_pair(file_path)
print(results_55)

##### Pair 56

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0056.txt"
results_56 = test_pair(file_path)
print(results_56)

##### Pair 57

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0057.txt"
results_57 = test_pair(file_path)
print(results_57)

##### Pair 58

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0058.txt"
results_58 = test_pair(file_path)
print(results_58)

##### Pair 59

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0059.txt"
results_59 = test_pair(file_path)
print(results_59)

##### Pair 60

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0060.txt"
results_60 = test_pair(file_path)
print(results_60)

##### Pair 61

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0061.txt"
results_61 = test_pair(file_path)
print(results_61)

##### Pair 62

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0062.txt"
results_62 = test_pair(file_path)
print(results_62)

##### Pair 63

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0063.txt"
results_63 = test_pair(file_path)
print(results_63)

##### Pair 64

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0064.txt"
results_64 = test_pair(file_path)
print(results_64)

##### Pair 65

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0065.txt"
results_65 = test_pair(file_path)
print(results_65)

##### Pair 66

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0066.txt"
results_66 = test_pair(file_path)
print(results_66)

##### Pair 67

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0067.txt"
results_67 = test_pair(file_path)
print(results_67)

##### Pair 68

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0068.txt"
results_68 = test_pair(file_path)
print(results_68)

##### Pair 69

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0069.txt"
results_69 = test_pair(file_path)
print(results_69)

##### Pair 70

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0070.txt"
results_70 = test_pair(file_path)
print(results_70)

##### Pair 71

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0071.txt"
results_71 = test_pair(file_path)
print(results_71)

##### Pair 72

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0072.txt"
results_72 = test_pair(file_path)
print(results_72)

##### Pair 73

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0073.txt"
results_73 = test_pair(file_path)
print(results_73)

##### Pair 74

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0074.txt"
results_74 = test_pair(file_path)
print(results_74)

##### Pair 75

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0075.txt"
results_75 = test_pair(file_path)
print(results_75)

##### Pair 76

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0076.txt"
results_76 = test_pair(file_path)
print(results_76)

##### Pair 77

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0077.txt"
results_77 = test_pair(file_path)
print(results_77)

##### Pair 78

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0078.txt"
results_78 = test_pair(file_path)
print(results_78)

##### Pair 79

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0079.txt"
results_79 = test_pair(file_path)
print(results_79)

##### Pair 80

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0080.txt"
results_80 = test_pair(file_path)
print(results_80)

##### Pair 81

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0081.txt"
results_81 = test_pair(file_path)
print(results_81)

##### Pair 82

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0082.txt"
results_82 = test_pair(file_path)
print(results_82)

##### Pair 83

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0083.txt"
results_83 = test_pair(file_path)
print(results_83)

##### Pair 84

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0084.txt"
results_84 = test_pair(file_path)
print(results_84)

##### Pair 85

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0085.txt"
results_85 = test_pair(file_path)
print(results_85)

##### Pair 86

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0086.txt"
results_86 = test_pair(file_path)
print(results_86)

##### Pair 87

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0087.txt"
results_87 = test_pair(file_path)
print(results_87)

##### Pair 88

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0088.txt"
results_88 = test_pair(file_path)
print(results_88)

##### Pair 89

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0089.txt"
results_89 = test_pair(file_path)
print(results_89)

##### Pair 90

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0090.txt"
results_90 = test_pair(file_path)
print(results_90)

##### Pair 91

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0091.txt"
results_91 = test_pair(file_path)
print(results_91)

##### Pair 92

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0092.txt"
results_92 = test_pair(file_path)
print(results_92)

##### Pair 93

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0093.txt"
results_93 = test_pair(file_path)
print(results_93)

##### Pair 94

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0094.txt"
results_94 = test_pair(file_path)
print(results_94)

##### Pair 95

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0095.txt"
results_95 = test_pair(file_path)
print(results_95)

##### Pair 96

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0096.txt"
results_96 = test_pair(file_path)
print(results_96)

##### Pair 97

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0097.txt"
results_97 = test_pair(file_path)
print(results_97)

##### Pair 98

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0098.txt"
results_98 = test_pair(file_path)
print(results_98)

##### Pair 99

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0099.txt"
results_99 = test_pair(file_path)
print(results_99)

##### Pair 100

In [None]:
file_path = "/Users/mkokkines/Documents/cause_effect_analysis/pairs/pair0100.txt"
results_100 = test_pair(file_path)
print(results_100)