# Efectos Fijos

Vamos a calcular la regresión con efectos fijos sobre los datos que ya teníamos.

Ver [Basic Econometrics](http://www.amazon.com/Basic-Econometrics-Damodar-Gujarati/dp/0071333452/ref=mt_paperback?_encoding=UTF8&me=)


Primero, includes y demás configuraciones básicas

In [1]:
source("src/notebook_base.R")


get_regression_table <- function(data_set, explanatory_variable="entrainment") {
  # Arma una tabla con las regresiones por filas
  tabla_regresiones <- data.frame()

  for (sv in social_vars) {
    formula_reg <- as.formula(paste(paste(sv, explanatory_variable, sep = " ~ "), "", sep = "")) # Acá definimos la relación etre variable social y entr (puede ser absoluto) que va a tester la regresión
    regresion <- plm(formula_reg, data = data_set,
             model = "within",
             #effect = "individual",
             index=c("real_session", "task"))
    
    # Para que el test sea igual a STATA (http://www.richard-bluhm.com/clustered-ses-in-r-and-stata-2/)
    #G <- length(unique(data_set$real_session))
    #N <- length(data_set$real_session)
    #dfa <- (G/(G - 1)) * ((N - 1)/regresion$df.residual)
    #salida <- coeftest(regresion, vcov = dfa * vcovHC(regresion, type="HC0", cluster="group", adjust = TRUE))
    # Fuente: http://stats.stackexchange.com/questions/10017/standard-error-clustering-in-r-either-manually-or-in-plm

    salida <- coeftest(regresion, vcov=vcovHC(regresion,type="HC0",cluster="group"))
    tabla_regresiones <- rbind(tabla_regresiones, t(data.frame(as.numeric(salida))))
  }
  rownames(tabla_regresiones) <- social_vars
  colnames(tabla_regresiones) <- c("Estimate", "Std. Error", "t value", "Significance")
  return(tabla_regresiones)
}


Loading required package: plm
Loading required package: Formula
Loading required package: lmtest
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: sandwich
Loading required package: sqldf
Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
Loading required package: DBI


## Primer caso: $sv \sim entrainment$

Vamos a armar las tablas de regresiones. En primer lugar, veamos la variable social en función del entrainment

In [2]:
for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  print(get_regression_table(df, "entrainment"))
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.08375    0.15450 -0.5421      0.58843
making_self_clear                     0.23214    0.12531  1.8525      0.06555
engaged_in_game                       0.02925    0.08437  0.3466      0.72927
planning_what_to_say                  0.08509    0.15460  0.5504      0.58271
gives_encouragement                   0.06846    0.20469  0.3344      0.73843
difficult_for_partner_to_speak       -0.11129    0.11771 -0.9454      0.34566
bored_with_game                       0.04776    0.11831  0.4037      0.68690
dislikes_partner                     -0.07072    0.08402 -0.8417      0.40105
[1] "ENG_MEAN"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.26591     0.1738 -1.5301       0.1277
making_self_clear                    -0.15143     0.1495 -1.0130       0.3124
engaged_in_game                    

## Segundo caso (y más importante) $sv \sim abs(entrainment)$

In [3]:
tablas_regresiones <- list()
tablas_regresiones_abs <- list()

for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  regression <- get_regression_table(df, "abs_entrainment")
  
  tablas_regresiones_abs[[ap_var]] <- regression
  # Ésto me sirve para la tabla de regresiones
  print(tablas_regresiones_abs[[ap_var]])
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion  0.07697     0.4197  0.1834    0.8546870
making_self_clear                     1.39914     0.3971  3.5232    0.0005371
engaged_in_game                       0.16702     0.2492  0.6702    0.5035367
planning_what_to_say                  0.21340     0.4609  0.4630    0.6439243
gives_encouragement                   0.51606     0.3330  1.5496    0.1229378
difficult_for_partner_to_speak       -0.56032     0.2822 -1.9858    0.0485345
bored_with_game                       0.34943     0.2488  1.4043    0.1618930
dislikes_partner                     -0.53267     0.3658 -1.4560    0.1470749
[1] "ENG_MEAN"
                                      Estimate Std. Error  t value Significance
contributes_to_successful_completion  0.721912     0.3643  1.98162      0.04900
making_self_clear                     0.815517     0.5061  1.61136      0.10881
engaged_in_game              

In [4]:
# Estoy haciendo una cosa horrible. Mañana le prendo unas velitas al dios de la programación

significant <- function(r) {
    return (r["Significance"] < 0.05);
}

positive <- function(r) {
    return (r["Estimate"] > 0);
}

build_sign_table <- function() {
    signs <- data.frame(row.names=social_vars)
    
    for (ap_var in ap_vars) {
        df <- load_csv(ap_var)
        reg <- get_regression_table(df, "abs_entrainment")
            
        ap_var_col <- ifelse(significant(reg) * positive(reg), 1, 0) + ifelse(significant(reg) * (!positive(reg)) , -1, 0) 
        ap_var_col[ap_var_col == 0] <- NA
        colnames(ap_var_col) <- c(ap_var)
        
        signs[[ap_var]] <- ap_var_col
    }
  
    return(signs)
}


sign_table <- build_sign_table()

print(sign_table)
#print(xtable(sign_table, digits=0))

                                     ENG_MAX ENG_MEAN F0_MEAN F0_MAX
contributes_to_successful_completion      NA        1       1      1
making_self_clear                          1       NA       1     NA
engaged_in_game                           NA       NA       1     NA
planning_what_to_say                      NA       NA      NA     NA
gives_encouragement                       NA       NA      NA     NA
difficult_for_partner_to_speak            -1       NA      NA     NA
bored_with_game                           NA       NA      NA     NA
dislikes_partner                          NA       NA      NA     NA
                                     NOISE_TO_HARMONICS_RATIO PHONEMES_AVG
contributes_to_successful_completion                       NA           NA
making_self_clear                                           1           NA
engaged_in_game                                            NA           NA
planning_what_to_say                                       NA           NA
give

# Altamente significantes

Los más significantes son 

1. ENG_MAX
2. ENG_MEAN
3. F0_MEAN
4. NOISE_TO_HARMONICS_RATIO

In [5]:
for (ap_var in c("ENG_MAX", "ENG_MEAN", "F0_MEAN", "NOISE_TO_HARMONICS_RATIO")){
    df <- tablas_regresiones_abs[[ap_var]]
    print(ap_var)
    print(xtable(df, digits= c(0,4,4,4,4)))
    #print(df)
}

[1] "ENG_MAX"
% latex table generated in R 3.2.3 by xtable 1.8-0 package
% Mon Feb 22 12:44:28 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Significance \\ 
  \hline
contributes\_to\_successful\_completion & 0.0770 & 0.4197 & 0.1834 & 0.8547 \\ 
  making\_self\_clear & 1.3991 & 0.3971 & 3.5232 & 0.0005 \\ 
  engaged\_in\_game & 0.1670 & 0.2492 & 0.6702 & 0.5035 \\ 
  planning\_what\_to\_say & 0.2134 & 0.4609 & 0.4630 & 0.6439 \\ 
  gives\_encouragement & 0.5161 & 0.3330 & 1.5496 & 0.1229 \\ 
  difficult\_for\_partner\_to\_speak & -0.5603 & 0.2822 & -1.9858 & 0.0485 \\ 
  bored\_with\_game & 0.3494 & 0.2488 & 1.4043 & 0.1619 \\ 
  dislikes\_partner & -0.5327 & 0.3658 & -1.4560 & 0.1471 \\ 
   \hline
\end{tabular}
\end{table}
[1] "ENG_MEAN"
% latex table generated in R 3.2.3 by xtable 1.8-0 package
% Mon Feb 22 12:44:28 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Significa