In [1]:
import platform, stata_setup

try:
    stata_setup.config("C:/Program Files/Stata18/", "se")

except:
    print('Stata18 is not installed!')
    
    if platform.system()=='Windows':
        stata_setup.config("C:/Program Files/Stata17/", "mp")  
    elif platform.system()=='Darwin':
        stata_setup.config("/Applications/Stata/", "mp")
    else: 
        stata_setup.config("/usr/local/stata17/", "mp")

Stata18 is not installed!

  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      17.0
___/   /   /___/   /   /___/       MP—Parallel Edition

 Statistics and Data Science       Copyright 1985-2021 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: Single-user 4-core  perpetual
Serial number: 501706303466
  Licensed to: David Tomas Jacho-Chavez
               Emory University

Notes:
      1. Unicode is supported; see help unicode_advice.
      2. More than 2 billion observations are allowed; see help obs_advice.
      3. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.


# Load data

In [2]:
%%stata 
clear all

use "../data/data", clear

global save_tables "../tables/"

set seed 123456


. clear all

. 
. use "../data/data", clear

. 
. global save_tables "../tables/"

. 
. set seed 123456

. 


______
# Generate some variables

In [3]:
%%stata 

# Generata some variables
gen pages_females = log_num_pages*only_females
gen pages_males = log_num_pages*only_males
gen pages_prop = log_num_pages*prop_women
gen authors_females = log_num_authors*only_females
gen authors_males = log_num_authors*only_males
gen authors_prop = log_num_authors*prop_women
gen words_females = log_num_words*only_females
gen words_males = log_num_words*only_males
gen words_prop = log_num_words*prop_women

gen female=.
replace female=1 if only_females==1
replace female=0 if only_males==1

quietly tabulate year, generate(y_)
quietly tabulate cluster, generate(c_)




. 
. # Generata some variables
Unknown #command
. gen pages_females = log_num_pages*only_females

. gen pages_males = log_num_pages*only_males

. gen pages_prop = log_num_pages*prop_women

. gen authors_females = log_num_authors*only_females

. gen authors_males = log_num_authors*only_males

. gen authors_prop = log_num_authors*prop_women

. gen words_females = log_num_words*only_females

. gen words_males = log_num_words*only_males

. gen words_prop = log_num_words*prop_women

. 
. gen female=.
(4,988 missing values generated)

. replace female=1 if only_females==1
(246 real changes made)

. replace female=0 if only_males==1
(3,689 real changes made)

. 
. quietly tabulate year, generate(y_)

. quietly tabulate cluster, generate(c_)

. 
. 


In [4]:
%%stata

# Generate interaction variables: Jel Code imputend/Observed and sex
local journals  ecm jpe qje res  //aer based category

local jel_imp a_imp b_imp c_imp  e_imp f_imp g_imp h_imp i_imp j_imp k_imp /// 
		l_imp m_imp n_imp o_imp p_imp q_imp r_imp y_imp z_imp // d based case

local jel_obs a_obs b_obs c_obs  e_obs f_obs g_obs h_obs i_obs j_obs k_obs /// 
		l_obs m_obs n_obs o_obs p_obs q_obs r_obs y_obs z_obs // d based case

local sex only_females only_males prop_women

foreach y of local jel_imp {
	foreach s of local sex{
		
		gen `y'_`s'= `y'*`s'
	}    
}

foreach y of local jel_obs {
	foreach s of local sex{
		
		gen `y'_`s'= `y'*`s'
	}    
}

local jel_female1 a_imp_only_females b_imp_only_females c_imp_only_females d_imp_only_females /// 
				e_imp_only_females f_imp_only_females g_imp_only_females h_imp_only_females /// 
				i_imp_only_females j_imp_only_females k_imp_only_females l_imp_only_females /// 
				m_imp_only_females n_imp_only_females o_imp_only_females p_imp_only_females ///
				q_imp_only_females r_imp_only_females y_imp_only_females z_imp_only_females

local jel_prop1 a_imp_prop_women b_imp_prop_women c_imp_prop_women d_imp_prop_women /// 
				e_imp_prop_women f_imp_prop_women g_imp_prop_women h_imp_prop_women /// 
				i_imp_prop_women j_imp_prop_women k_imp_prop_women l_imp_prop_women /// 
				m_imp_prop_women n_imp_prop_women o_imp_prop_women p_imp_prop_women /// 
				q_imp_prop_women r_imp_prop_women y_imp_prop_women z_imp_prop_women

local jel_male1 a_imp_only_males b_imp_only_males c_imp_only_males d_imp_only_males /// 
				e_imp_only_males f_imp_only_males g_imp_only_males h_imp_only_males /// 
				i_imp_only_males j_imp_only_males k_imp_only_males l_imp_only_males /// 
				m_imp_only_males n_imp_only_males o_imp_only_males p_imp_only_males /// 
				q_imp_only_males r_imp_only_males y_imp_only_males z_imp_only_males 

local jel_female2 a_obs_only_females b_obs_only_females c_obs_only_females d_obs_only_females ///
				e_obs_only_females f_obs_only_females g_obs_only_females h_obs_only_females /// 
				i_obs_only_females j_obs_only_females k_obs_only_females l_obs_only_females /// 
				m_obs_only_females n_obs_only_females o_obs_only_females p_obs_only_females /// 
				q_obs_only_females r_obs_only_females y_obs_only_females z_obs_only_females

local jel_prop2 a_obs_prop_women b_obs_prop_women c_obs_prop_women d_obs_prop_women /// 
				e_obs_prop_women f_obs_prop_women g_obs_prop_women h_obs_prop_women /// 
				i_obs_prop_women j_obs_prop_women k_obs_prop_women l_obs_prop_women /// 
				m_obs_prop_women n_obs_prop_women o_obs_prop_women p_obs_prop_women /// 
				q_obs_prop_women r_obs_prop_women y_obs_prop_women z_obs_prop_women

local jel_male2 a_obs_only_males b_obs_only_males c_obs_only_males d_obs_only_males /// 
				e_obs_only_males f_obs_only_males g_obs_only_males h_obs_only_males /// 
				i_obs_only_males j_obs_only_males k_obs_only_males l_obs_only_males /// 
				m_obs_only_males n_obs_only_males o_obs_only_males p_obs_only_males ///
				q_obs_only_males r_obs_only_males y_obs_only_males z_obs_only_males




. 
. # Generate interaction variables: Jel Code imputend/Observed and sex
Unknown #command
. local journals  ecm jpe qje res  //aer based category

. 
. local jel_imp a_imp b_imp c_imp  e_imp f_imp g_imp h_imp i_imp j_imp k_imp //
> / 
>                 l_imp m_imp n_imp o_imp p_imp q_imp r_imp y_imp z_imp // d ba
> sed case

. 
. local jel_obs a_obs b_obs c_obs  e_obs f_obs g_obs h_obs i_obs j_obs k_obs //
> / 
>                 l_obs m_obs n_obs o_obs p_obs q_obs r_obs y_obs z_obs // d ba
> sed case

. 
. local sex only_females only_males prop_women

. 
. foreach y of local jel_imp {
  2.         foreach s of local sex{
  3.                 
.                 gen `y'_`s'= `y'*`s'
  4.         }    
  5. }

. 
. foreach y of local jel_obs {
  2.         foreach s of local sex{
  3.                 
.                 gen `y'_`s'= `y'*`s'
  4.         }    
  5. }

. 
. local jel_female1 a_imp_only_females b_imp_only_females c_imp_only_females d_
> imp_only_females /// 
>              

In [5]:
%%stata

# Relabel variables - This is necessary to get table of summarize statistics

label variable log_flesch_kincaid_grade_level "log(F-K grade)"
label variable log_dale_chall "log(Dale-Chall)"
label variable only_females "\hspace{3mm}Female"
label variable only_males "\hspace{3mm}Male"
label variable both_genders "\hspace{3mm}Both genders" 
label variable num_authors "\hspace{3mm}Number of authors"
label variable log_num_authors "\hspace{3mm}log(Number authors)"
label variable num_words "\hspace{3mm}Number of words"
label variable log_num_words "\hspace{3mm}log(Number of words)"
label variable num_pages "\hspace{3mm}Number of pages"
label variable log_num_pages "\hspace{3mm}log(Number pages)"
label variable num_sentences "\hspace{3mm}Number of sentences"
label variable num_syllables "\hspace{3mm}Number of syllables"
label variable log_num_sentences "\hspace{3mm}log(Number of sentences)"
label variable log_num_syllables "\hspace{3mm}log(Number of syllables)"
label variable prop_women "\hspace{3mm}Share of women" 
label variable jel_flag "\hspace{3mm}Papers with  jel code"
label variable isolated "\hspace{3mm}Papers not connected"
label variable aer "\hspace{3mm}American Economic Review"
label variable ecm "\hspace{3mm}Econometrica"
label variable jpe "\hspace{3mm}Journal of Political Economy"
label variable qje "\hspace{3mm}The Quarterly Journal of Economics"
label variable res "\hspace{3mm}Review Economic Studies"

label variable pages_females "\hspace{3mm}log(Number of pages) $\times$ Female"
label variable pages_males "\hspace{3mm}log(Number of pages) $\times$ Male"
label variable pages_prop "\hspace{3mm}log(Number of pages) $\times$ Share of females"
label variable authors_females "\hspace{3mm}log(Number of authors) $\times$ Female"
label variable authors_males "\hspace{3mm}log(Number of authors) $\times$ Male"
label variable authors_prop "\hspace{3mm}log(Number of authors) $\time$ Share of females"
label variable words_females "\hspace{3mm}log(Number of words) $\times$ Female"
label variable words_males "\hspace{3mm}log(Number of words) $\times$ Male"
label variable words_prop "\hspace{3mm}log(Number of words) $\time$ Share of females"




. 
. # Relabel variables - This is necessary to get table of summarize statistics
Unknown #command
. 
. label variable log_flesch_kincaid_grade_level "log(F-K grade)"

. label variable log_dale_chall "log(Dale-Chall)"

. label variable only_females "\hspace{3mm}Female"

. label variable only_males "\hspace{3mm}Male"

. label variable both_genders "\hspace{3mm}Both genders" 

. label variable num_authors "\hspace{3mm}Number of authors"

. label variable log_num_authors "\hspace{3mm}log(Number authors)"

. label variable num_words "\hspace{3mm}Number of words"

. label variable log_num_words "\hspace{3mm}log(Number of words)"

. label variable num_pages "\hspace{3mm}Number of pages"

. label variable log_num_pages "\hspace{3mm}log(Number pages)"

. label variable num_sentences "\hspace{3mm}Number of sentences"

. label variable num_syllables "\hspace{3mm}Number of syllables"

. label variable log_num_sentences "\hspace{3mm}log(Number of sentences)"

. label variable log_num_syllables "\

In [6]:
%%stata

rename log_flesch_kincaid_grade_level FKG
rename log_dale_chall DCH


. 
. rename log_flesch_kincaid_grade_level FKG

. rename log_dale_chall DCH

. 


_____
# Table 4: Double-Selection Lasso Linear Estimation Results, JEL codes observed
### Supplemental Materials | page 5

In [7]:
%%stata

quietly dsregress FKG  log_num_authors  log_num_pages both_genders only_females if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m1

quietly dsregress FKG  	log_num_authors  log_num_pages both_genders prop_women if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m2

quietly dsregress FKG  log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m3
			

esttab m1 m2 m3  using "$save_tables\dgregress_FKG_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
 		stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
 		"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label



. 
. quietly dsregress FKG  log_num_authors  log_num_pages both_genders only_femal
> es if jel_flag==1, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

.                 estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

.                 estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  20

.                 estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  50.031322

.                 est store m1

. 
. quietly dsregress FKG   log_num_authors  log_num_pages both_genders prop_wome
> n if jel_flag==1, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:
         

In [8]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0049   -0.0055   -0.0047  
             |  0.0073    0.0071    0.0072  
log_num_pa~s |  0.0209    0.0206    0.0209  
             |  0.0047    0.0047    0.0047  
both_genders | -0.0122   -0.0017    0.0137  
             |  0.0053    0.0067    0.0099  
only_females | -0.0263                      
             |  0.0072                      
  prop_women |           -0.0245            
             |            0.0066            
  only_males |                      0.0260  
             |                      0.0069  
-------------+------------------------------
           N |    3126      3126      3126  
--------------------------------------------
                                Legend: b/se


______
# Table 5: Double-Selection Lasso Linear Estimation Results with Hyperparameters Chosen by Cross-validation & Adaptive Lasso, JEL codes imputed
### Supplemental Materials | page 6

In [9]:
%%stata
	
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_females  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_females   , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders prop_women  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
			selection(cv) rseed(42)
			
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders prop_women  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m4

quietly dsregress FKG log_num_authors  log_num_pages both_genders only_males  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m5
			
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_males  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m6

esttab m1 m2 m3 m4 m5 m6  using "$save_tables\dsregress_sel_FKG_JI.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2 selection, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$" "Selection") fmt( 0 0 0 %9.3f 0)) label 



.         
. quietly dsregress FKG log_num_authors  log_num_pages both_genders only_female
> s  , ///
>         controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(clus
> ter cluster) ///
>                 selection(cv) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  37

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  16.65665

. est store m1

.                 
. quietly dsregress FKG log_num_authors  log_num_pages both_genders only_female
> s   , ///
>         controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(clus
> ter cluster) ///
>                 selection(adaptive) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scala

In [10]:
%%stata
estimates table m1 m2 m3 m4 m5 m6, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------------------------------------
    Variable |   m1        m2        m3        m4        m5        m6     
-------------+------------------------------------------------------------
log_num_au~s | -0.0053   -0.0056   -0.0054   -0.0056   -0.0053   -0.0056  
             |  0.0042    0.0043    0.0042    0.0043    0.0042    0.0043  
log_num_pa~s |  0.0162    0.0146    0.0162    0.0148    0.0162    0.0147  
             |  0.0053    0.0049    0.0053    0.0050    0.0053    0.0050  
both_genders | -0.0070   -0.0070    0.0005    0.0007    0.0128    0.0134  
             |  0.0046    0.0049    0.0043    0.0043    0.0062    0.0064  
only_females | -0.0197   -0.0198                                          
             |  0.0058    0.0060                                          
  prop_women |                     -0.0172   -0.0177                      
             |                      0.0058    0.0065                      
  only_males |          

_____
# Table 6: Double-Selection Lasso Linear, Estimation Results with Hyperparameters Chosen by Cross-validation & Adaptive Lasso, JEL codes observed
### Supplemental Materials | page 7

In [11]:
%%stata	
	
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_females  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_females  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders prop_women  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders prop_women   if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m4

quietly dsregress FKG log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m5
		
quietly dsregress FKG log_num_authors  log_num_pages both_genders only_males if jel_flag==1, ////
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m6

esttab m1 m2 m3 m4 m5 m6  using "$save_tables\dsregress_sel_FKG_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2 selection, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$" "Selection") fmt( 0 0 0 %9.3f 0)) label 


.         
. quietly dsregress FKG log_num_authors  log_num_pages both_genders only_female
> s  if jel_flag==1, ///
>         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster clus
> ter) ///
>                 selection(cv) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  40

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  35.369824

. est store m1

.                 
. quietly dsregress FKG log_num_authors  log_num_pages both_genders only_female
> s  if jel_flag==1, ///
>         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster clust
> er) ///
>                 selection(adaptive) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 



In [12]:
%%stata
estimates table m1 m2 m3 m4 m5 m6, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------------------------------------
    Variable |   m1        m2        m3        m4        m5        m6     
-------------+------------------------------------------------------------
log_num_au~s | -0.0057   -0.0053   -0.0062   -0.0057   -0.0057   -0.0052  
             |  0.0067    0.0071    0.0066    0.0069    0.0067    0.0070  
log_num_pa~s |  0.0192    0.0197    0.0192    0.0197    0.0192    0.0198  
             |  0.0063    0.0069    0.0063    0.0071    0.0063    0.0071  
both_genders | -0.0107   -0.0098   -0.0006   -0.0005    0.0140    0.0129  
             |  0.0052    0.0050    0.0069    0.0065    0.0110    0.0102  
only_females | -0.0247   -0.0225                                          
             |  0.0079    0.0074                                          
  prop_women |                     -0.0236   -0.0216                      
             |                      0.0074    0.0069                      
  only_males |          

_____
# Table 7: Cross-Fit Partialing-Out Lasso Linear Estimation Results, JEL codes imputed
### Supplemental Materials | page 8

In [13]:
%%stata

quietly xporegress FKG log_num_authors  log_num_pages both_genders only_females  , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
    
quietly xporegress FKG log_num_authors  log_num_pages both_genders prop_women   , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2

quietly xporegress FKG log_num_authors  log_num_pages both_genders only_males  , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
    

esttab m1 m2 m3  using "$save_tables\xporegress_FKG_JI.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label


. 
. quietly xporegress FKG log_num_authors  log_num_pages both_genders only_femal
> es  , ///
>     controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster
>  cluster) rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  68

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  11.20676

. est store m1

.     
. quietly xporegress FKG log_num_authors  log_num_pages both_genders prop_women
>    , ///
>     controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster
>  cluster) rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  69

. estadd s

In [14]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0059   -0.0066   -0.0062  
             |  0.0070    0.0070    0.0070  
log_num_pa~s |  0.0171    0.0171    0.0174  
             |  0.0081    0.0081    0.0081  
both_genders | -0.0077    0.0018    0.0164  
             |  0.0055    0.0063    0.0102  
only_females | -0.0217                      
             |  0.0088                      
  prop_women |           -0.0225            
             |            0.0088            
  only_males |                      0.0244  
             |                      0.0089  
-------------+------------------------------
           N |    4988      4988      4988  
--------------------------------------------
                                Legend: b/se


_____
# Table 8: Cross-Fit Partialing-Out Lasso Linear Estimation Results, JEL codes observed
### Supplemental Materials | page 9

In [15]:
%%stata

quietly xporegress FKG log_num_authors  log_num_pages both_genders only_females   if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
    
quietly xporegress FKG log_num_authors  log_num_pages both_genders prop_women   if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2

quietly xporegress FKG log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
    

esttab m1 m2 m3  using "$save_tables\xporegress_FKG_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label
		


. 
. quietly xporegress FKG log_num_authors  log_num_pages both_genders only_femal
> es   if jel_flag==1, ///
>     controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) 
>  rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  62

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  11.171207

. est store m1

.     
. quietly xporegress FKG log_num_authors  log_num_pages both_genders prop_women
>    if jel_flag==1, ///
>     controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) 
>  rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  61

In [16]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0065   -0.0072   -0.0066  
             |  0.0068    0.0069    0.0068  
log_num_pa~s |  0.0184    0.0184    0.0186  
             |  0.0099    0.0099    0.0099  
both_genders | -0.0126   -0.0014    0.0141  
             |  0.0066    0.0074    0.0120  
only_females | -0.0268                      
             |  0.0117                      
  prop_women |           -0.0262            
             |            0.0110            
  only_males |                      0.0265  
             |                      0.0117  
-------------+------------------------------
           N |    3126      3126      3126  
--------------------------------------------
                                Legend: b/se


____
# Table 9: Double-Selection Lasso Linear Estimation Results, JEL codes imputed
### Supplemental Materials | page 11

In [17]:
%%stata

quietly dsregress DCH  log_num_authors  log_num_pages both_genders only_females, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m1

quietly dsregress DCH  	log_num_authors  log_num_pages both_genders prop_women, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m2

quietly dsregress DCH  log_num_authors  log_num_pages both_genders only_males, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m3
			

esttab m1 m2 m3  using "$save_tables\dgregress_DCH_JI.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
 		stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
 		"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label



. 
. quietly dsregress DCH  log_num_authors  log_num_pages both_genders only_femal
> es, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

.                 estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

.                 estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  19

.                 estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  185.79331

.                 est store m1

. 
. quietly dsregress DCH   log_num_authors  log_num_pages both_genders prop_wome
> n, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

.      

In [18]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s |  0.0001   -0.0000    0.0001  
             |  0.0015    0.0015    0.0015  
log_num_pa~s |  0.0277    0.0277    0.0278  
             |  0.0031    0.0031    0.0031  
both_genders | -0.0066   -0.0032    0.0017  
             |  0.0014    0.0017    0.0030  
only_females | -0.0087                      
             |  0.0027                      
  prop_women |           -0.0079            
             |            0.0025            
  only_males |                      0.0084  
             |                      0.0026  
-------------+------------------------------
           N |    4988      4988      4988  
--------------------------------------------
                                Legend: b/se


_____
# Table 10: Double-Selection Lasso Linear Estimation Results, JEL codes observed
### Supplemental Materials | page 12

In [19]:
%%stata

quietly dsregress DCH    log_num_authors  log_num_pages both_genders only_females if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m1

quietly dsregress DCH    	log_num_authors  log_num_pages both_genders prop_women if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m2

quietly dsregress DCH    log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
			controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
			
		estadd scalar w1 = e(N) 
		estadd scalar w2 = e(k_controls) 
		estadd scalar w3 = e(k_controls_sel)
		estadd scalar w4 = e(chi2)
		est store m3
			

esttab m1 m2 m3  using "$save_tables\dgregress_DCH_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
 		stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
 		"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label



. 
. quietly dsregress DCH    log_num_authors  log_num_pages both_genders only_fem
> ales if jel_flag==1, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

.                 estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

.                 estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  22

.                 estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  187.59068

.                 est store m1

. 
. quietly dsregress DCH           log_num_authors  log_num_pages both_genders p
> rop_women if jel_flag==1, ///
>                         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  je
> l_flag ) vce(cluster cluster) rseed(42)

.                         
.                 estadd scalar w1 = e(N) 

added scalar:

In [20]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0006   -0.0006   -0.0005  
             |  0.0016    0.0016    0.0016  
log_num_pa~s |  0.0299    0.0297    0.0298  
             |  0.0023    0.0023    0.0023  
both_genders | -0.0050   -0.0026    0.0027  
             |  0.0013    0.0021    0.0043  
only_females | -0.0076                      
             |  0.0044                      
  prop_women |           -0.0058            
             |            0.0041            
  only_males |                      0.0078  
             |                      0.0041  
-------------+------------------------------
           N |    3126      3126      3126  
--------------------------------------------
                                Legend: b/se


______
# Table 11: Double-Selection Lasso Linear Estimation Results with Hyperparameters Chosen by Cross-validation & Adaptive Lasso, JEL codes imputed
### Supplemental Materials | page 13

In [21]:
%%stata
	
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_females  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_females   , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders prop_women  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
			selection(cv) rseed(42)
			
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders prop_women  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m4

quietly dsregress DCH log_num_authors  log_num_pages both_genders only_males  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m5
			
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_males  , ///
	controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m6

esttab m1 m2 m3 m4 m5 m6  using "$save_tables\dsregress_sel_DCH_JI.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2 selection, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$" "Selection") fmt( 0 0 0 %9.3f 0)) label


.         
. quietly dsregress DCH log_num_authors  log_num_pages both_genders only_female
> s  , ///
>         controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(clus
> ter cluster) ///
>                 selection(cv) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  37

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  80.768206

. est store m1

.                 
. quietly dsregress DCH log_num_authors  log_num_pages both_genders only_female
> s   , ///
>         controls(`journals' `jel_imp' y_2-y_20  c_2-c_215 jel_flag ) vce(clus
> ter cluster) ///
>                 selection(adaptive) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scal

In [22]:
%%stata
estimates table m1 m2 m3 m4 m5 m6, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------------------------------------
    Variable |   m1        m2        m3        m4        m5        m6     
-------------+------------------------------------------------------------
log_num_au~s | -0.0017   -0.0020   -0.0017   -0.0021   -0.0017   -0.0020  
             |  0.0019    0.0018    0.0019    0.0018    0.0019    0.0018  
log_num_pa~s |  0.0267    0.0262    0.0267    0.0260    0.0267    0.0260  
             |  0.0039    0.0038    0.0039    0.0037    0.0039    0.0037  
both_genders | -0.0064   -0.0070   -0.0030   -0.0034    0.0025    0.0024  
             |  0.0014    0.0015    0.0014    0.0014    0.0024    0.0025  
only_females | -0.0090   -0.0098                                          
             |  0.0027    0.0027                                          
  prop_women |                     -0.0080   -0.0084                      
             |                      0.0027    0.0027                      
  only_males |          

____
# Table 12: Double-Selection Lasso Linear Estimation Results with Hyperparameters Chosen by Cross-validation & Adaptive Lasso, JEL codes observed
### Supplemental Materials | page 14

In [23]:
%%stata	
	
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_females  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_females  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders prop_women  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders prop_women   if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m4

quietly dsregress DCH log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster cluster) ///
		selection(cv) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m5
		
quietly dsregress DCH log_num_authors  log_num_pages both_genders only_males if jel_flag==1, ////
	controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) ///
		selection(adaptive) rseed(42)
		
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m6

esttab m1 m2 m3 m4 m5 m6  using "$save_tables\dsregress_sel_DCH_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2 selection, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$" "Selection") fmt( 0 0 0 %9.3f 0)) label


.         
. quietly dsregress DCH log_num_authors  log_num_pages both_genders only_female
> s  if jel_flag==1, ///
>         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215  ) vce(cluster clus
> ter) ///
>                 selection(cv) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  40

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  72.939691

. est store m1

.                 
. quietly dsregress DCH log_num_authors  log_num_pages both_genders only_female
> s  if jel_flag==1, ///
>         controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster clust
> er) ///
>                 selection(adaptive) rseed(42)

.                 
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 



In [24]:
%%stata
estimates table m1 m2 m3 m4 m5 m6, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------------------------------------
    Variable |   m1        m2        m3        m4        m5        m6     
-------------+------------------------------------------------------------
log_num_au~s | -0.0020   -0.0019   -0.0020   -0.0021   -0.0020   -0.0021  
             |  0.0018    0.0017    0.0019    0.0018    0.0018    0.0017  
log_num_pa~s |  0.0270    0.0273    0.0270    0.0269    0.0270    0.0269  
             |  0.0041    0.0045    0.0041    0.0043    0.0041    0.0043  
both_genders | -0.0054   -0.0055   -0.0020   -0.0019    0.0040    0.0042  
             |  0.0015    0.0015    0.0019    0.0019    0.0033    0.0037  
only_females | -0.0095   -0.0101                                          
             |  0.0031    0.0033                                          
  prop_women |                     -0.0079   -0.0083                      
             |                      0.0030    0.0032                      
  only_males |          

_____
# Table 13: Cross-Fit Partialing-Out Lasso Linear Estimation Results, JEL codes imputed
### Supplemental Materials | page 15

In [25]:
%%stata

quietly xporegress DCH log_num_authors  log_num_pages both_genders only_females  , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
    
quietly xporegress DCH log_num_authors  log_num_pages both_genders prop_women   , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2

quietly xporegress DCH log_num_authors  log_num_pages both_genders only_males  , ///
    controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster cluster) rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
    

esttab m1 m2 m3  using "$save_tables\xporegress_DCH_JI.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label
		


. 
. quietly xporegress DCH log_num_authors  log_num_pages both_genders only_femal
> es  , ///
>     controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster
>  cluster) rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  71

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  55.997855

. est store m1

.     
. quietly xporegress DCH log_num_authors  log_num_pages both_genders prop_women
>    , ///
>     controls(`journals' `jel_imp' y_2-y_20  c_2-c_215  jel_flag ) vce(cluster
>  cluster) rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  4988

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  257

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  72

. estadd 

In [26]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0003   -0.0004   -0.0003  
             |  0.0030    0.0030    0.0030  
log_num_pa~s |  0.0279    0.0279    0.0280  
             |  0.0045    0.0045    0.0045  
both_genders | -0.0066   -0.0028    0.0034  
             |  0.0030    0.0031    0.0053  
only_females | -0.0099                      
             |  0.0054                      
  prop_women |           -0.0089            
             |            0.0051            
  only_males |                      0.0101  
             |                      0.0052  
-------------+------------------------------
           N |    4988      4988      4988  
--------------------------------------------
                                Legend: b/se


____
# Table 14: Cross-Fit Partialing-Out Lasso Linear Estimation Results, JEL codes observed
### Supplemental Materials | page 16

In [27]:
%%stata

quietly xporegress DCH log_num_authors  log_num_pages both_genders only_females   if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m1
    
quietly xporegress DCH log_num_authors  log_num_pages both_genders prop_women   if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m2

quietly xporegress DCH log_num_authors  log_num_pages both_genders only_males  if jel_flag==1, ///
    controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster)  rseed(42)
    
estadd scalar w1 = e(N) 
estadd scalar w2 = e(k_controls) 
estadd scalar w3 = e(k_controls_sel)
estadd scalar w4 = e(chi2)
est store m3
    

esttab m1 m2 m3  using "$save_tables\xporegress_DCH_JO.tex",  replace b(4) se(4) star(* 0.10 ** 0.05) r2   ///
stats(N k_controls k_controls_sel chi2, label( "Observations" "Number potential controls" ///
"Number controls selected" "$\chi^2(4)$") fmt( 0 0 0 %9.3f)) label
	


. 
. quietly xporegress DCH log_num_authors  log_num_pages both_genders only_femal
> es   if jel_flag==1, ///
>     controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) 
>  rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  64

. estadd scalar w4 = e(chi2)

added scalar:
                 e(w4) =  29.61741

. est store m1

.     
. quietly xporegress DCH log_num_authors  log_num_pages both_genders prop_women
>    if jel_flag==1, ///
>     controls(`journals' `jel_obs' y_2-y_20  c_2-c_215 ) vce(cluster cluster) 
>  rseed(42)

.     
. estadd scalar w1 = e(N) 

added scalar:
                 e(w1) =  3126

. estadd scalar w2 = e(k_controls) 

added scalar:
                 e(w2) =  256

. estadd scalar w3 = e(k_controls_sel)

added scalar:
                 e(w3) =  63


In [28]:
%%stata
estimates table m1 m2 m3, b(%7.4f) se(%7.4f)  stats(N)


--------------------------------------------
    Variable |   m1        m2        m3     
-------------+------------------------------
log_num_au~s | -0.0025   -0.0025   -0.0024  
             |  0.0045    0.0046    0.0046  
log_num_pa~s |  0.0292    0.0292    0.0292  
             |  0.0064    0.0064    0.0064  
both_genders | -0.0058   -0.0030    0.0013  
             |  0.0043    0.0057    0.0093  
only_females | -0.0081                      
             |  0.0073                      
  prop_women |           -0.0064            
             |            0.0069            
  only_males |                      0.0070  
             |                      0.0074  
-------------+------------------------------
           N |    3126      3126      3126  
--------------------------------------------
                                Legend: b/se
