In [2]:
library(haven)
library(stargazer)
library(dplyr)
library(infer)
library(ggplot2)
library(broom)
library(knitr)
library(kableExtra)



Please cite as: 


 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




ERROR: Error in library(kableExtra): there is no package called ‘kableExtra’


In [4]:
data <- read_dta("./cen_ind_2021_pumf_v2.dta")

In [9]:
colnames(data)

In [10]:
#removes NA rows
data <- data[data$CFInc != 88, ]

data <- data[data$agegrp != 88, ]

data <- data[data$genstat != 88, ]

data <- data[data$hdgree != 88, ]
data <- data[data$hdgree != 99, ]

data <- data[data$lfact != 88, ]
data <- data[data$lfact != 99, ]



In [17]:
data <- na.omit(data)


In [3]:
# simple regression 1 - CFInc (Income) based on Gentstat (First Gen / Second Gen Canadian)


reg_1 <-  lm(CFInc ~ genstat, data = data)

stargazer(reg_1, type = "text")



                         Dependent variable:     
                    -----------------------------
                                CFInc            
-------------------------------------------------
genstat                       0.411***           
                               (0.006)           
                                                 
Constant                      20.798***          
                               (0.021)           
                                                 
-------------------------------------------------
Observations                   980,868           
R2                              0.004            
Adjusted R2                     0.004            
Residual Std. Error      8.944 (df = 980866)     
F Statistic         4,277.340*** (df = 1; 980866)
Note:                 *p<0.1; **p<0.05; ***p<0.01


In [4]:
# simple regression 2 - adding covariates for Gender and agegrp 

reg_2 <-  lm(CFInc ~ genstat + Gender + agegrp, data = data)

stargazer(reg_2, type = "text")



                         Dependent variable:     
                    -----------------------------
                                CFInc            
-------------------------------------------------
genstat                       0.395***           
                               (0.006)           
                                                 
Gender                        0.481***           
                               (0.018)           
                                                 
agegrp                        -0.142***          
                               (0.001)           
                                                 
Constant                      21.790***          
                               (0.037)           
                                                 
-------------------------------------------------
Observations                   980,868           
R2                              0.018            
Adjusted R2                     0.018            

In [5]:
# simple regression 3 - adding covariates for hdgree (education level) and lfact (labour force participation)

reg_3 <-  lm(CFInc ~ genstat + Gender + agegrp + hdgree + lfact, data = data)

stargazer(reg_3, type = "text")



                         Dependent variable:      
                    ------------------------------
                                CFInc             
--------------------------------------------------
genstat                        0.430***           
                               (0.006)            
                                                  
Gender                         0.371***           
                               (0.017)            
                                                  
agegrp                        -0.094***           
                               (0.001)            
                                                  
hdgree                         0.274***           
                               (0.001)            
                                                  
lfact                         -0.271***           
                               (0.001)            
                                                  
Constant                      

In [6]:
# simple regression 4 - by pr (province)

reg_4 <-  lm(CFInc ~ genstat + Gender + agegrp + hdgree + lfact + pr, data = data)

stargazer(reg_4, type = "text")




                         Dependent variable:      
                    ------------------------------
                                CFInc             
--------------------------------------------------
genstat                        0.461***           
                               (0.006)            
                                                  
Gender                         0.371***           
                               (0.017)            
                                                  
agegrp                        -0.094***           
                               (0.001)            
                                                  
hdgree                         0.274***           
                               (0.001)            
                                                  
lfact                         -0.270***           
                               (0.001)            
                                                  
pr                            

In [9]:
# checking for interaction effects 

reg_interaction <- lm(CFInc ~ genstat * hdgree + pr + Gender + agegrp + genstat * lfact, data = data)


stargazer(reg_interaction, type = "text")



                         Dependent variable:      
                    ------------------------------
                                CFInc             
--------------------------------------------------
genstat                        0.389***           
                               (0.007)            
                                                  
hdgree                         0.283***           
                               (0.002)            
                                                  
lfact                         -0.295***           
                               (0.002)            
                                                  
pr                             0.026***           
                               (0.001)            
                                                  
Gender                         0.368***           
                               (0.017)            
                                                  
agegrp                        

In [38]:
# Basic regressions
model1 <- lm(CFInc ~ genstat, data = data)
model2 <- lm(CFInc ~ genstat + Gender, data = data)
model3 <- lm(CFInc ~ genstat + Gender + agegrp, data = data)
model4 <- lm(CFInc ~ genstat + Gender + agegrp + hdgree, data = data)
model5 <- lm(CFInc ~ genstat * hdgree + pr + Gender + agegrp + genstat * lfact, data = data)

# Stargazer Table Output
stargazer(model1.1, model2.1, model3.1, model4.1, 
          type = "html", 
          title = "Degree on Generational Status",
          dep.var.labels = "Dependent Variable: CFInc",
          out = "income_table4.html")




<table style="text-align:center"><caption><strong>Degree on Generational Status</strong></caption>
<tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="4"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="4" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="4">Dependent Variable: CFInc</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td></tr>
<tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">genstat</td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.0003)</td><td>(0.0003)</td><td>(0.0003)</td><td>(0.0003)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Gender</td><td></td><td

In [19]:
#indivuals age 12-20
data <- data %>%
  mutate(agegrp_dummy = ifelse(agegrp %in% c("5", "6", "7"), 1, 0))

#dummy for bachelors degree to simplify 
data <- data %>%
  mutate(bachelors_degree = ifelse(hdgree %in% c(9), 1, 0))

In [37]:
#degree status on genstat 
# should be careful, meaning of hdgree is problematic 
model1.1 <- lm(bachelors_degree ~ genstat, data = data)
model2.1 <- lm(bachelors_degree ~ genstat + Gender, data = data)
model3.1 <- lm(bachelors_degree ~ genstat + Gender + agegrp, data = data)
model4.1 <- lm(bachelors_degree ~ genstat + Gender + agegrp + lfact*agegrp_dummy, data = data)

# Stargazer Table Output
stargazer(model1.1, model2.1, model3.1, model4.1, 
          type = "html", 
          title = "Degree on Generational Status",
          dep.var.labels = "Dependent Variable: bachelors_degree",
          out = "degree_table4.html")




<table style="text-align:center"><caption><strong>Degree on Generational Status</strong></caption>
<tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="4"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="4" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="4">Dependent Variable: bachelors</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td></tr>
<tr><td colspan="5" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">genstat</td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td><td>-0.022<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td>(0.0003)</td><td>(0.0003)</td><td>(0.0003)</td><td>(0.0003)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Gender</td><td></td

In [18]:
extra1=lm(bachelors_degree ~ lfact*agegrp_dummy, data = data)

In [19]:
summary(extra1)


Call:
lm(formula = bachelors_degree ~ lfact * agegrp_dummy, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.19887 -0.19887 -0.17293  0.01294  0.99869 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.010e-01  4.183e-04  480.56   <2e-16 ***
lfact              -2.161e-03  1.095e-05 -197.29   <2e-16 ***
agegrp_dummy       -1.995e-01  1.691e-03 -117.96   <2e-16 ***
lfact:agegrp_dummy  2.145e-03  2.836e-05   75.65   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3433 on 980864 degrees of freedom
Multiple R-squared:  0.05411,	Adjusted R-squared:  0.0541 
F-statistic: 1.87e+04 on 3 and 980864 DF,  p-value: < 2.2e-16
