Skip to content

Commit

Permalink
bugfix with i.var#c.var
Browse files Browse the repository at this point in the history
Now we don't exclude base variables with cont. interactions
  • Loading branch information
sergiocorreia committed Mar 15, 2015
1 parent 11e3ab0 commit bda7e03
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 16 deletions.
Binary file modified misc/reghdfe.zip
Binary file not shown.
25 changes: 18 additions & 7 deletions package/reghdfe.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! reghdfe 1.4.334 12mar2015
*! reghdfe 1.4.371 15mar2015
*! Sergio Correia (sergio.correia@duke.edu)
* (built from multiple source files using build.py)
program define reghdfe
Expand Down Expand Up @@ -1408,6 +1408,8 @@ syntax varlist(min=1 numeric fv ts) [if] [,setname(string)] [CACHE]
* EG: if we use i.foreign#i.rep78 , several categories will be redundant, seen as e.g. "0b.foreign" in -char list-
* We'll also exclude base categories that don't have the "bn" option (to have no base)

* However, if there is a cont. interaction, then we DO want the base categories!

* Loop for each var and then expand them into i.var -> 1.var.. and loop
* Why two loops? B/c I want to save each var expansion to allow for a cache

Expand Down Expand Up @@ -1442,14 +1444,17 @@ syntax varlist(min=1 numeric fv ts) [if] [,setname(string)] [CACHE]
if (substr("`var'", 1, 2)=="__") {
local color result
local parts : subinstr local fvchar "#" " ", all
local continteraction = strpos("`fvchar'", "c.")>0
foreach part of local parts {
*di as error "part=<`part'> cont=`continteraction' all=<`fvchar'>"
* "^[0-9]+b\." -> "b.*\."
if regexm("`part'", "b.*\.") | regexm("`part'", "o.*\.") {
if (regexm("`part'", "b.*\.") & !`continteraction') | regexm("`part'", "o.*\.") {
local color error
drop `var'
continue, break
}
}
if ("`color'"=="error") {
local color result
}


* Need to rename it, or else it gets dropped since its a tempvar
Expand Down Expand Up @@ -1525,7 +1530,14 @@ local vars `0'
if ("`newname'"=="") local newname `var'
}
else if (`is_temp' & "`name'"!="") {
local newname `prefix'`name' // BUGBUG
local newname `prefix'`name'

* Fix bug when the var is omitted:
local bugmatch = regexm("`newname'", "^o\.([0-9]+)b?\.(.+)$")
if (`bugmatch') {
local newname = regexs(1) + "o." + regexs(2) // EG: 1o.var
}

local prettyname `newname'
}
else {
Expand All @@ -1534,7 +1546,7 @@ local vars `0'
}
}

* di in red " var=<`var'> --> new=<`newname'> pretty=<`prettyname'>"
*di in red " var=<`var'> --> new=<`newname'> pretty=<`prettyname'>"
Assert ("`newname'"!="" & "`prettyname'"!=""), ///
msg("var=<`var'> --> new=<`newname'> pretty=<`prettyname'>")
local newnames `newnames' `newname'
Expand Down Expand Up @@ -2002,7 +2014,6 @@ program define Wrapper_ivregress, eclass
local wmatrix : subinstr local vceoption "vce(" "wmatrix("
local vceoption = cond(`vceunadjusted', "vce(unadjusted)", "")
}
di as error "<`vceunadjusted'>!!!"

* Note: the call to -ivregress- could be optimized.
* EG: -ivregress- calls ereturn post .. ESAMPLE(..) but we overwrite the esample and its SLOW
Expand Down
2 changes: 1 addition & 1 deletion package/reghdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ d KW: instrumental variables
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150312
d Distribution-Date: 20150315
d

f reghdfe.ado
Expand Down
11 changes: 8 additions & 3 deletions source/_reghdfe/ExpandFactorVariables.ado
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ syntax varlist(min=1 numeric fv ts) [if] [,setname(string)] [CACHE]
* EG: if we use i.foreign#i.rep78 , several categories will be redundant, seen as e.g. "0b.foreign" in -char list-
* We'll also exclude base categories that don't have the "bn" option (to have no base)

* However, if there is a cont. interaction, then we DO want the base categories!

* Loop for each var and then expand them into i.var -> 1.var.. and loop
* Why two loops? B/c I want to save each var expansion to allow for a cache

Expand Down Expand Up @@ -46,14 +48,17 @@ syntax varlist(min=1 numeric fv ts) [if] [,setname(string)] [CACHE]
if (substr("`var'", 1, 2)=="__") {
local color result
local parts : subinstr local fvchar "#" " ", all
local continteraction = strpos("`fvchar'", "c.")>0
foreach part of local parts {
*di as error "part=<`part'> cont=`continteraction' all=<`fvchar'>"
* "^[0-9]+b\." -> "b.*\."
if regexm("`part'", "b.*\.") | regexm("`part'", "o.*\.") {
if (regexm("`part'", "b.*\.") & !`continteraction') | regexm("`part'", "o.*\.") {
local color error
drop `var'
continue, break
}
}
if ("`color'"=="error") {
local color result
}


* Need to rename it, or else it gets dropped since its a tempvar
Expand Down
11 changes: 9 additions & 2 deletions source/_reghdfe/FixVarnames.ado
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ local vars `0'
if ("`newname'"=="") local newname `var'
}
else if (`is_temp' & "`name'"!="") {
local newname `prefix'`name' // BUGBUG
local newname `prefix'`name'

* Fix bug when the var is omitted:
local bugmatch = regexm("`newname'", "^o\.([0-9]+)b?\.(.+)$")
if (`bugmatch') {
local newname = regexs(1) + "o." + regexs(2) // EG: 1o.var
}

local prettyname `newname'
}
else {
Expand All @@ -42,7 +49,7 @@ local vars `0'
}
}

* di in red " var=<`var'> --> new=<`newname'> pretty=<`prettyname'>"
*di in red " var=<`var'> --> new=<`newname'> pretty=<`prettyname'>"
Assert ("`newname'"!="" & "`prettyname'"!=""), ///
msg("var=<`var'> --> new=<`newname'> pretty=<`prettyname'>")
local newnames `newnames' `newname'
Expand Down
1 change: 0 additions & 1 deletion source/_reghdfe/Wrapper_ivregress.ado
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ program define Wrapper_ivregress, eclass
local wmatrix : subinstr local vceoption "vce(" "wmatrix("
local vceoption = cond(`vceunadjusted', "vce(unadjusted)", "")
}
di as error "<`vceunadjusted'>!!!"

* Note: the call to -ivregress- could be optimized.
* EG: -ivregress- calls ereturn post .. ESAMPLE(..) but we overwrite the esample and its SLOW
Expand Down
2 changes: 1 addition & 1 deletion source/reghdfe.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! reghdfe 1.4.334 12mar2015
*! reghdfe 1.4.371 15mar2015
*! Sergio Correia (sergio.correia@duke.edu)
* (built from multiple source files using build.py)

Expand Down
2 changes: 1 addition & 1 deletion source/reghdfe.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ d KW: instrumental variables
d
d Requires: Stata version 11.2
d
d Distribution-Date: 20150312
d Distribution-Date: 20150315
d

f reghdfe.ado
Expand Down
2 changes: 2 additions & 0 deletions test/super.do
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
run test-mwc
run test-ivreg2-nocons

run test-cont-interaction

run test-gmm
* Doesn't work: run test-cue
run test-liml
Expand Down
65 changes: 65 additions & 0 deletions test/test-cont-interaction.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
cd "D:/Github/reghdfe" // /source
cscript "reghdfe test bugfix ibn.x1#c.x2 do not drop base cat." adofile reghdfe

* Setup
discard
clear all
set more off
* cls

* Convenience: "Trim <size>" will trim e(b) and e(V)
capture program drop TrimMatrix
program define TrimMatrix, eclass
args size
assert `size'>0
matrix trim_b = e(b)
matrix trim_V = e(V)
matrix trim_b = trim_b[1, 1..`size']
matrix trim_V = trim_V[1..`size',1..`size']
ereturn matrix trim_b = trim_b
ereturn matrix trim_V = trim_V
end

* Create fake dataset
sysuse auto
*gen n = int(uniform()*10+3) // used for weights
*replace length = 0 if rep==3 // used for DoF adjustment of cont var
*replace length = 5 if rep==1
*gen byte one = 1
bys turn: gen t = _n
tsset turn t
drop if missing(rep)

local included_e scalar: N rmse tss rss r2 r2_a F df_r df_a df_m F_absorb ///
matrix: trim_b trim_V ///
macros: wexp wtype

* [TEST]
local lhs price
local rhs c.weight#ibn.rep
local absvars turn
fvunab tmp : `rhs'
*local K : list sizeof tmp

* 1. Run benchmark
areg `lhs' `rhs', absorb(`absvars')
local K = e(df_m) - 1
TrimMatrix `K'
storedresults save benchmark e()

* 2. Run reghdfe and compare
reghdfe `lhs' `rhs', absorb(`absvars') vce(unadjusted, suite(default))
TrimMatrix `K'
storedresults compare benchmark e(), tol(1e-12) include(`included_e')

storedresults drop benchmark

* 3. Ensure that it doesn't die with multi
reghdfe `lhs' `rhs', absorb(`absvars') nocons
reghdfe `lhs' `rhs' weight, absorb(`absvars') nocons
reghdfe `lhs' weight `rhs', absorb(`absvars') nocons
egen x = std(weight)
reghdfe `lhs' x c.x#i.rep weight c.weight#i.rep, absorb(`absvars') nocons

cd "D:/Github/reghdfe/test"
exit

0 comments on commit bda7e03

Please sign in to comment.