# Efectos Fijos

Vamos a calcular la regresión con efectos fijos sobre los datos que ya teníamos.

Ver [Basic Econometrics](http://www.amazon.com/Basic-Econometrics-Damodar-Gujarati/dp/0071333452/ref=mt_paperback?_encoding=UTF8&me=)


Primero, includes y demás configuraciones básicas

In [20]:
source("src/notebook_base.R")


get_regression_table <- function(data_set, explanatory_variable="entrainment") {
  # Arma una tabla con las regresiones por filas
  tabla_regresiones <- data.frame()

  for (sv in social_vars) {
    formula_reg <- as.formula(paste(paste(sv, explanatory_variable, sep = " ~ "), "", sep = "")) # Acá definimos la relación etre variable social y entr (puede ser absoluto) que va a tester la regresión
    regresion <- plm(formula_reg, data = data_set,
             model = "within",
             #effect = "individual",
             index=c("real_session", "task"))
    
    # Para que el test sea igual a STATA (http://www.richard-bluhm.com/clustered-ses-in-r-and-stata-2/)
    #G <- length(unique(data_set$real_session))
    #N <- length(data_set$real_session)
    #dfa <- (G/(G - 1)) * ((N - 1)/regresion$df.residual)
    #salida <- coeftest(regresion, vcov = dfa * vcovHC(regresion, type="HC0", cluster="group", adjust = TRUE))
    # Fuente: http://stats.stackexchange.com/questions/10017/standard-error-clustering-in-r-either-manually-or-in-plm

    salida <- coeftest(regresion, vcov=vcovHC(regresion,type="HC0",cluster="group"))
    tabla_regresiones <- rbind(tabla_regresiones, t(data.frame(as.numeric(salida))))
  }
  rownames(tabla_regresiones) <- social_vars
  colnames(tabla_regresiones) <- c("Estimate", "Std. Error", "t value", "Significance")
  return(tabla_regresiones)
}


## Primer caso: $sv \sim entrainment$

Vamos a armar las tablas de regresiones. En primer lugar, veamos la variable social en función del entrainment

In [21]:
for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  print(get_regression_table(df, "entrainment"))
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.04740    0.15988 -0.2965      0.76721
making_self_clear                     0.21060    0.12562  1.6765      0.09532
engaged_in_game                       0.07969    0.11153  0.7146      0.47577
planning_what_to_say                  0.09865    0.17721  0.5567      0.57841
gives_encouragement                   0.02704    0.21067  0.1284      0.89800
difficult_for_partner_to_speak       -0.06148    0.10421 -0.5900      0.55590
bored_with_game                       0.04751    0.13791  0.3445      0.73085
dislikes_partner                     -0.14544    0.07851 -1.8526      0.06553
[1] "ENG_MEAN"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.29503    0.17459 -1.6898      0.09274
making_self_clear                    -0.21365    0.15599 -1.3696      0.17246
engaged_in_game                    

## Segundo caso (y más importante) $sv \sim abs(entrainment)$

In [22]:
tablas_regresiones <- list()
tablas_regresiones_abs <- list()

for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  regression <- get_regression_table(df, "abs_entrainment")
  
  tablas_regresiones_abs[[ap_var]] <- regression
  # Ésto me sirve para la tabla de regresiones
  print(tablas_regresiones_abs[[ap_var]])
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion  0.04965     0.4262  0.1165    9.074e-01
making_self_clear                     1.65805     0.3864  4.2909    2.866e-05
engaged_in_game                       0.33073     0.2576  1.2840    2.008e-01
planning_what_to_say                  0.50050     0.5327  0.9395    3.487e-01
gives_encouragement                   0.42644     0.3792  1.1246    2.622e-01
difficult_for_partner_to_speak       -0.71995     0.2858 -2.5190    1.262e-02
bored_with_game                       0.21632     0.2560  0.8450    3.992e-01
dislikes_partner                     -0.43176     0.3443 -1.2541    2.114e-01
[1] "ENG_MEAN"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion  0.62614     0.3593 1.74269      0.08305
making_self_clear                     0.97748     0.5985 1.63317      0.10413
engaged_in_game                    

In [97]:
# Estoy haciendo una cosa horrible. Mañana le prendo unas velitas al dios de la programación

significant <- function(r) {
    return (r["Significance"] < 0.05);
}

positive <- function(r) {
    return (r["Estimate"] > 0);
}

build_sign_table <- function() {
    signs <- data.frame(row.names=social_vars)
    
    for (ap_var in ap_vars) {
        df <- load_csv(ap_var)
        reg <- get_regression_table(df, "abs_entrainment")
            
        ap_var_col <- ifelse(significant(reg) * positive(reg), 1, 0) + ifelse(significant(reg) * (!positive(reg)) , -1, 0) 
        ap_var_col[ap_var_col == 0] <- NA
        colnames(ap_var_col) <- c(ap_var)
        
        signs[[ap_var]] <- ap_var_col
    }
  
    return(signs)
}


sign_table <- build_sign_table()

print(sign_table)
#print(xtable(sign_table, digits=0))

                                     ENG_MAX ENG_MEAN F0_MEAN F0_MAX
contributes_to_successful_completion      NA       NA       1     NA
making_self_clear                          1       NA       1     NA
engaged_in_game                           NA       NA       1     NA
planning_what_to_say                      NA       NA      NA     NA
gives_encouragement                       NA       NA      NA     NA
difficult_for_partner_to_speak            -1       NA      NA     NA
bored_with_game                           NA       NA      -1     NA
dislikes_partner                          NA       NA      NA     NA
                                     NOISE_TO_HARMONICS_RATIO PHONEMES_AVG
contributes_to_successful_completion                        1           NA
making_self_clear                                           1           NA
engaged_in_game                                            NA           NA
planning_what_to_say                                       NA           NA
give

# Altamente significantes

Los más significantes son 

1. ENG_MAX
2. ENG_MEAN
3. F0_MEAN
4. NOISE_TO_HARMONICS_RATIO

In [3]:
for (ap_var in c("ENG_MAX", "ENG_MEAN", "F0_MEAN", "NOISE_TO_HARMONICS_RATIO")){
    df <- tablas_regresiones_abs[[ap_var]]
    print(ap_var)
    print(xtable(df, digits= c(0,4,4,4,4)))
    #print(df)
}

[1] "ENG_MAX"
% latex table generated in R 3.2.2 by xtable 1.8-0 package
% Mon Jan 18 02:11:44 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
  \hline
contributes\_to\_successful\_completion & 0.0497 & 0.4262 & 0.1165 & 0.9074 \\ 
  making\_self\_clear & 1.6581 & 0.3864 & 4.2909 & 0.0000 \\ 
  engaged\_in\_game & 0.3307 & 0.2576 & 1.2840 & 0.2008 \\ 
  planning\_what\_to\_say & 0.5005 & 0.5327 & 0.9395 & 0.3487 \\ 
  gives\_encouragement & 0.4264 & 0.3792 & 1.1246 & 0.2622 \\ 
  difficult\_for\_partner\_to\_speak & -0.7200 & 0.2858 & -2.5190 & 0.0126 \\ 
  bored\_with\_game & 0.2163 & 0.2560 & 0.8450 & 0.3992 \\ 
  dislikes\_partner & -0.4318 & 0.3443 & -1.2541 & 0.2114 \\ 
   \hline
\end{tabular}
\end{table}
[1] "ENG_MEAN"
% latex table generated in R 3.2.2 by xtable 1.8-0 package
% Mon Jan 18 02:11:44 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$