## Efectos Fijos

Vamos a calcular la regresión con efectos fijos sobre los datos que ya teníamos.

Ver [Basic Econometrics](http://www.amazon.com/Basic-Econometrics-Damodar-Gujarati/dp/0071333452/ref=mt_paperback?_encoding=UTF8&me=)


Primero, includes y demás configuraciones básicas

In [1]:
rm(list=ls())
options(digits = 4)
options(repos="http://cran.us.r-project.org")
library(xtable)

if (!require("plm")) install.packages("plm")
if (!require("lmtest")) install.packages("lmtest")
if (!require("sandwich")) install.packages("sandwich")
if (!require("sqldf")) install.packages("sqldf")
    

social_vars <- c(
    "contributes_to_successful_completion",
    "making_self_clear",
    "engaged_in_game",
    "planning_what_to_say",
    "gives_encouragement",
    "difficult_for_partner_to_speak",
    "bored_with_game",
    "dislikes_partner")
    
ap_vars <- c(
    "ENG_MAX",
    "ENG_MEAN",
    "F0_MEAN",
    "F0_MAX",
    "NOISE_TO_HARMONICS_RATIO",
    "PHONEMES_AVG",
    "PHONEMES_COUNT",
    "SOUND_VOICED_LOCAL_SHIMMER",
    "SYLLABES_AVG",
    "SYLLABES_COUNT",
    "VCD2TOT_FRAMES"
)


# Carga el csv en un data frame y calcula el valor
# absoluto de entrainment.

load_csv <- function(ap_var) {
  path <- paste("tables/", ap_var, ".csv", sep="")
   
  data_set <- read.table(path, sep = ",", header = T,
               row.names = 1)

  data_set$real_session <- factor(paste(data_set$session,
                      data_set$speaker, sep = "_"))

  data_set$abs_entrainment <- abs(data_set$entrainment)
  data_set$entrainment_neg <- ifelse(data_set$entrainment < 0, TRUE, FALSE)
  return(data_set)
}

Loading required package: plm
Loading required package: Formula
Loading required package: lmtest
Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: sandwich
Loading required package: sqldf
Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
Loading required package: DBI


Vamos a armar las tablas de regresiones...

In [2]:


arma_tabla_regs <- function(data_set, social_var) {
  # Arma una tabla con las regresiones por filas
  tabla_regresiones <- data.frame()

  for (sv in social_vars) {
    formula_reg <- as.formula(paste(paste(sv, social_var, sep = " ~ "), "", sep = "")) # Acá definimos la relación etre variable social y entr (puede ser absoluto) que va a tester la regresión
    regresion <- plm(formula_reg, data = data_set,
             model = "within",
             #effect = "individual",
             index=c("real_session", "task"))
    
    # Para que el test sea igual a STATA (http://www.richard-bluhm.com/clustered-ses-in-r-and-stata-2/)
    #G <- length(unique(data_set$real_session))
    #N <- length(data_set$real_session)
    #dfa <- (G/(G - 1)) * ((N - 1)/regresion$df.residual)
    #salida <- coeftest(regresion, vcov = dfa * vcovHC(regresion, type="HC0", cluster="group", adjust = TRUE))
    # Fuente: http://stats.stackexchange.com/questions/10017/standard-error-clustering-in-r-either-manually-or-in-plm

    salida <- coeftest(regresion, vcov=vcovHC(regresion,type="HC0",cluster="group"))
    tabla_regresiones <- rbind(tabla_regresiones, t(data.frame(as.numeric(salida))))
  }
  rownames(tabla_regresiones) <- social_vars
  colnames(tabla_regresiones) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
  return(tabla_regresiones)
}


# Loop principal para calcular las tablas
tablas_regresiones <- list()
tablas_regresiones_abs <- list()

for (ap_var in ap_vars) {
  print(ap_var)
  df <- load_csv(ap_var)
  tablas_regresiones_abs[[ap_var]] <- arma_tabla_regs(df, "abs_entrainment")
  print(tablas_regresiones_abs[[ap_var]])
}

[1] "ENG_MAX"
                                     Estimate Std. Error t value  Pr(>|t|)
contributes_to_successful_completion  0.04965     0.4262  0.1165 9.074e-01
making_self_clear                     1.65805     0.3864  4.2909 2.866e-05
engaged_in_game                       0.33073     0.2576  1.2840 2.008e-01
planning_what_to_say                  0.50050     0.5327  0.9395 3.487e-01
gives_encouragement                   0.42644     0.3792  1.1246 2.622e-01
difficult_for_partner_to_speak       -0.71995     0.2858 -2.5190 1.262e-02
bored_with_game                       0.21632     0.2560  0.8450 3.992e-01
dislikes_partner                     -0.43176     0.3443 -1.2541 2.114e-01
[1] "ENG_MEAN"
                                     Estimate Std. Error t value Pr(>|t|)
contributes_to_successful_completion  0.62614     0.3593 1.74269  0.08305
making_self_clear                     0.97748     0.5985 1.63317  0.10413
engaged_in_game                       0.69087     0.4105 1.68299  0.09406


# Altamente significantes

Los más significantes son 

1. ENG_MAX
2. ENG_MEAN
3. F0_MEAN
4. NOISE_TO_HARMONICS_RATIO

In [3]:
for (ap_var in c("ENG_MAX", "ENG_MEAN", "F0_MEAN", "NOISE_TO_HARMONICS_RATIO")){
    df <- tablas_regresiones_abs[[ap_var]]
    print(ap_var)
    print(xtable(df, digits= c(0,4,4,4,4)))
    #print(df)
}

[1] "ENG_MAX"
% latex table generated in R 3.2.2 by xtable 1.8-0 package
% Mon Jan 18 02:11:44 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
  \hline
contributes\_to\_successful\_completion & 0.0497 & 0.4262 & 0.1165 & 0.9074 \\ 
  making\_self\_clear & 1.6581 & 0.3864 & 4.2909 & 0.0000 \\ 
  engaged\_in\_game & 0.3307 & 0.2576 & 1.2840 & 0.2008 \\ 
  planning\_what\_to\_say & 0.5005 & 0.5327 & 0.9395 & 0.3487 \\ 
  gives\_encouragement & 0.4264 & 0.3792 & 1.1246 & 0.2622 \\ 
  difficult\_for\_partner\_to\_speak & -0.7200 & 0.2858 & -2.5190 & 0.0126 \\ 
  bored\_with\_game & 0.2163 & 0.2560 & 0.8450 & 0.3992 \\ 
  dislikes\_partner & -0.4318 & 0.3443 & -1.2541 & 0.2114 \\ 
   \hline
\end{tabular}
\end{table}
[1] "ENG_MEAN"
% latex table generated in R 3.2.2 by xtable 1.8-0 package
% Mon Jan 18 02:11:44 2016
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
  \hline
 & Estimate & Std. Error & t value & Pr($>$$