# GxE problem

- I updated the simulation function so that sigma is always positive (exponential)
- I am using `yt` in the function below to estimate models

In [None]:
library(brms)
library(data.table)
library(texreg)

In [3]:
# Domingue's simulation function (adjusted)
simData = function(E,i=1,b0=.8,b1=.2,b2=0,b3=.05,h=sqrt(.6), a0 = 0, a1=.5, sigma=1,scaling=TRUE) {
    N = length(E)
    G = rnorm(N,0,1)
    eps = rnorm(N,1,sigma)
    if (scaling){
        e = sqrt(1-h^2)
        # I don't use ystar and y
        ystar = h*G+e*eps
        y = a0 + a1*E+(b0 + b1*E)*ystar
        # sigma of the error term should be
        fsigma = exp(b0*e + b1*e*E) 
        # final y values
        yt = a0 + a1*E + b0*h*G + b1*h*E*G + rnorm(N, 0, fsigma)

    } else {   
        y = b1*G + b2*E+ b3*G*E + eps
        fsigma = exp(0.1 + 0.4*E) 
        yt = a0 + b1*G + b2*E+ b3*G*E + rnorm(N, 0, fsigma)
    }
    df = data.frame( E=E, y=y,yt = yt, g=G)
    df
}


## Scaling model

In [16]:
E = rnorm(5000, 0, 1)
dts = data.table(simData(E, scaling = TRUE))
summary(dts)



       E                   y                 yt                 g            
 Min.   :-3.301867   Min.   :-2.2517   Min.   :-6.94959   Min.   :-3.507243  
 1st Qu.:-0.673069   1st Qu.:-0.2315   1st Qu.:-1.29079   1st Qu.:-0.676297  
 Median :-0.015479   Median : 0.4216   Median :-0.06325   Median : 0.002465  
 Mean   :-0.008537   Mean   : 0.5119   Mean   :-0.01304   Mean   : 0.010831  
 3rd Qu.: 0.670350   3rd Qu.: 1.1453   3rd Qu.: 1.16263   3rd Qu.: 0.700029  
 Max.   : 3.536246   Max.   : 5.3418   Max.   : 7.34203   Max.   : 4.181852  

In [17]:
cnames = c("ystar", "y")
m1 = lm(y ~ g + E + g * E, data = dts)
m2 = lm(yt ~ g + E + g * E, data = dts)
cat(screenreg(list(m1, m2)))


             Model 1      Model 2    
-------------------------------------
(Intercept)     0.51 ***    -0.02    
               (0.01)       (0.02)   
g               0.62 ***     0.63 ***
               (0.01)       (0.02)   
E               0.63 ***     0.50 ***
               (0.01)       (0.02)   
g:E             0.15 ***     0.14 ***
               (0.01)       (0.02)   
-------------------------------------
R^2             0.75         0.19    
Adj. R^2        0.75         0.19    
Num. obs.    5000         5000       
*** p < 0.001; ** p < 0.01; * p < 0.05



# Bayesian distributional model


## Scaling model

In [None]:
# distributional model using bayesian stats
f = bf(yt ~ g + E + g * E, sigma ~ 1 + E)
m3 = brm(f, data = dts, family = brmsfamily("gaussian", link_sigma = "log"))


In [20]:
# able to get the sigma coefficients of the simulation
cat(screenreg(m3))


                 Model 1      
------------------------------
Intercept           -0.02     
                 [-0.06; 0.02]
sigma_Intercept      0.51 *   
                 [ 0.49; 0.52]
g                    0.63 *   
                 [ 0.59; 0.67]
E                    0.50 *   
                 [ 0.46; 0.54]
g:E                  0.14 *   
                 [ 0.10; 0.18]
sigma_E              0.13 *   
                 [ 0.11; 0.15]
------------------------------
R^2                  0.19     
Num. obs.         5000        
loo IC           19274.58     
WAIC             19274.57     
* 0 outside the confidence interval.


In [19]:

# I cannot reject the null hypothesis
hyp <- "g * sigma_E = g:E * sigma_Intercept"
(hyp <- hypothesis(m3, hyp, alpha = 0.05))


Hypothesis Tests for class b:
                Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (g*sigma_E)-(g:E*... = 0     0.01      0.01    -0.01     0.04         NA
  Post.Prob Star
1        NA     
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.

## Non-scaling model

In [9]:
dt = data.table(simData(E, scaling = FALSE))
summary(dt)

       E                   y                 yt                 g            
 Min.   :-3.195115   Min.   :-3.2102   Min.   :-7.98533   Min.   :-4.456101  
 1st Qu.:-0.687378   1st Qu.: 0.3093   1st Qu.:-0.73812   1st Qu.:-0.671414  
 Median : 0.021276   Median : 0.9959   Median : 0.01515   Median :-0.012346  
 Mean   : 0.006253   Mean   : 0.9877   Mean   :-0.01035   Mean   : 0.004088  
 3rd Qu.: 0.683919   3rd Qu.: 1.6650   3rd Qu.: 0.73109   3rd Qu.: 0.690277  
 Max.   : 3.338312   Max.   : 4.6601   Max.   : 7.20045   Max.   : 4.046617  

In [None]:
f = bf(yt ~ g + E + g * E, sigma ~ 1 + E)
m4 = brm(f, data = dt, family = brmsfamily("gaussian", link_sigma = "log"))

In [11]:
cat(screenreg(m4))


                 Model 1      
------------------------------
Intercept           -0.01     
                 [-0.04; 0.02]
sigma_Intercept      0.10 *   
                 [ 0.09; 0.12]
g                    0.18 *   
                 [ 0.15; 0.20]
E                    0.00     
                 [-0.02; 0.02]
g:E                  0.02     
                 [-0.00; 0.05]
sigma_E              0.41 *   
                 [ 0.39; 0.42]
------------------------------
R^2                  0.02     
Num. obs.         5000        
loo IC           15265.79     
WAIC             15265.78     
* 0 outside the confidence interval.


In [15]:
# I reject the null hypothesis
hyp <- "g * sigma_E = g:E * sigma_Intercept"
(hyp <- hypothesis(m4, hyp, alpha = 0.05))

Hypothesis Tests for class b:
                Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
1 (g*sigma_E)-(g:E*... = 0     0.07      0.01     0.06     0.08         NA
  Post.Prob Star
1        NA    *
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.