# Efectos Fijos

Vamos a calcular la regresión con efectos fijos sobre los datos que ya teníamos.

Ver [Basic Econometrics](http://www.amazon.com/Basic-Econometrics-Damodar-Gujarati/dp/0071333452/ref=mt_paperback?_encoding=UTF8&me=)


Primero, includes y demás configuraciones básicas

In [1]:
source("src/notebook_base.R")


get_regression_table <- function(data_set, explanatory_variable="entrainment") {
  # Arma una tabla con las regresiones por filas
  tabla_regresiones <- data.frame()

  for (sv in social_vars) {
    formula_reg <- as.formula(paste(paste(sv, explanatory_variable, sep = " ~ "), "", sep = "")) # Acá definimos la relación etre variable social y entr (puede ser absoluto) que va a tester la regresión
    regresion <- plm(formula_reg, data = data_set,
             model = "within",
             #effect = "individual",
             index=c("real_session", "task"))
    
    # Para que el test sea igual a STATA (http://www.richard-bluhm.com/clustered-ses-in-r-and-stata-2/)
    #G <- length(unique(data_set$real_session))
    #N <- length(data_set$real_session)
    #dfa <- (G/(G - 1)) * ((N - 1)/regresion$df.residual)
    #salida <- coeftest(regresion, vcov = dfa * vcovHC(regresion, type="HC0", cluster="group", adjust = TRUE))
    # Fuente: http://stats.stackexchange.com/questions/10017/standard-error-clustering-in-r-either-manually-or-in-plm

    salida <- coeftest(regresion, vcov=vcovHC(regresion,type="HC0",cluster="group"))
    tabla_regresiones <- rbind(tabla_regresiones, t(data.frame(as.numeric(salida))))
  }
  rownames(tabla_regresiones) <- social_vars
  colnames(tabla_regresiones) <- c("Estimate", "Std. Error", "t value", "Significance")
  return(tabla_regresiones)
}


Loading required package: plm
Loading required package: Formula
Loading required package: lmtest
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: sandwich
Loading required package: sqldf
Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
Loading required package: DBI


## Primer caso: $sv \sim entrainment$

Vamos a armar las tablas de regresiones. En primer lugar, veamos la variable social en función del entrainment

In [2]:
for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  print(get_regression_table(df, "entrainment"))
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.04191    0.16081 -0.2606       0.7947
making_self_clear                     0.20587    0.12558  1.6393       0.1028
engaged_in_game                       0.08240    0.10963  0.7516       0.4533
planning_what_to_say                  0.12102    0.17771  0.6810       0.4967
gives_encouragement                   0.02604    0.20956  0.1243       0.9012
difficult_for_partner_to_speak       -0.05213    0.10592 -0.4922       0.6232
bored_with_game                       0.04031    0.13681  0.2946       0.7686
dislikes_partner                     -0.12385    0.07994 -1.5492       0.1230
[1] "ENG_MEAN"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion -0.27645    0.17625 -1.5685      0.11847
making_self_clear                    -0.19134    0.15783 -1.2123      0.22694
engaged_in_game                    

## Segundo caso (y más importante) $sv \sim abs(entrainment)$

In [3]:
tablas_regresiones <- list()
tablas_regresiones_abs <- list()

for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  regression <- get_regression_table(df, "abs_entrainment")
  
  tablas_regresiones_abs[[ap_var]] <- regression
  # Ésto me sirve para la tabla de regresiones
  print(tablas_regresiones_abs[[ap_var]])
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value Significance
contributes_to_successful_completion  0.07195     0.4258  0.1690    8.660e-01
making_self_clear                     1.69142     0.3820  4.4274    1.628e-05
engaged_in_game                       0.34564     0.2528  1.3673    1.732e-01
planning_what_to_say                  0.56547     0.5208  1.0859    2.790e-01
gives_encouragement                   0.47387     0.3744  1.2655    2.073e-01
difficult_for_partner_to_speak       -0.69250     0.2863 -2.4185    1.655e-02
bored_with_game                       0.21104     0.2543  0.8298    4.077e-01
dislikes_partner                     -0.42541     0.3438 -1.2373    2.175e-01
[1] "ENG_MEAN"
                                      Estimate Std. Error   t value
contributes_to_successful_completion  0.655190     0.3610  1.814712
making_self_clear                     0.946955     0.6080  1.557502
engaged_in_game                       0.709115     0.3847  1.8431

In [4]:
# Estoy haciendo una cosa horrible. Mañana le prendo unas velitas al dios de la programación

significant <- function(r) {
    return (r["Significance"] < 0.05);
}

positive <- function(r) {
    return (r["Estimate"] > 0);
}

build_sign_table <- function() {
    signs <- data.frame(row.names=social_vars)
    
    for (ap_var in ap_vars) {
        df <- load_csv(ap_var)
        reg <- get_regression_table(df, "abs_entrainment")
            
        ap_var_col <- ifelse(significant(reg) * positive(reg), 1, 0) + ifelse(significant(reg) * (!positive(reg)) , -1, 0) 
        ap_var_col[ap_var_col == 0] <- NA
        colnames(ap_var_col) <- c(ap_var)
        
        signs[[ap_var]] <- ap_var_col
    }
  
    return(signs)
}


sign_table <- build_sign_table()

print(sign_table)
#print(xtable(sign_table, digits=0))

                                     ENG_MAX ENG_MEAN F0_MEAN F0_MAX
contributes_to_successful_completion      NA       NA       1     NA
making_self_clear                          1       NA      NA     NA
engaged_in_game                           NA       NA       1     NA
planning_what_to_say                      NA       NA      NA     NA
gives_encouragement                       NA       NA      NA     NA
difficult_for_partner_to_speak            -1        1      NA     NA
bored_with_game                           NA       NA      -1     NA
dislikes_partner                          NA       NA      NA     NA
                                     NOISE_TO_HARMONICS_RATIO PHONEMES_AVG
contributes_to_successful_completion                        1           NA
making_self_clear                                           1           NA
engaged_in_game                                            NA           NA
planning_what_to_say                                       NA           NA
give

# Altamente significantes

Los más significantes son 

1. ENG_MAX
2. ENG_MEAN
3. F0_MEAN
4. NOISE_TO_HARMONICS_RATIO

In [5]:
for (ap_var in c("ENG_MAX", "ENG_MEAN", "F0_MEAN", "NOISE_TO_HARMONICS_RATIO")){
    df <- tablas_regresiones_abs[[ap_var]]
    print(ap_var)
    print(xtable(df, digits= c(0,4,4,4,4)))
    #print(df)
}

[1] "ENG_MAX"
% latex table generated in R 3.2.3 by xtable 1.8-0 package
% Mon Feb 22 09:40:16 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Significance \\ 
  \hline
contributes\_to\_successful\_completion & 0.0720 & 0.4258 & 0.1690 & 0.8660 \\ 
  making\_self\_clear & 1.6914 & 0.3820 & 4.4274 & 0.0000 \\ 
  engaged\_in\_game & 0.3456 & 0.2528 & 1.3673 & 0.1732 \\ 
  planning\_what\_to\_say & 0.5655 & 0.5208 & 1.0859 & 0.2790 \\ 
  gives\_encouragement & 0.4739 & 0.3744 & 1.2655 & 0.2073 \\ 
  difficult\_for\_partner\_to\_speak & -0.6925 & 0.2863 & -2.4185 & 0.0166 \\ 
  bored\_with\_game & 0.2110 & 0.2543 & 0.8298 & 0.4077 \\ 
  dislikes\_partner & -0.4254 & 0.3438 & -1.2373 & 0.2175 \\ 
   \hline
\end{tabular}
\end{table}
[1] "ENG_MEAN"
% latex table generated in R 3.2.3 by xtable 1.8-0 package
% Mon Feb 22 09:40:16 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Significa