Skip to content

Commit

Permalink
gtools-0.6.9 (2017-06-27); minor enhancement and bug fix
Browse files Browse the repository at this point in the history
Enhancements

* Addressed the possible issue noted in issue
  #3 and the functions now
  use mata and extended macro functions as applicable.

Bug fixes

* `gegen varname = tag(varlist)` no longer tags missing values, as noted
  in issue #5
  • Loading branch information
mcaceresb committed Jul 27, 2017
1 parent 208505d commit 2c3a3fe
Show file tree
Hide file tree
Showing 10 changed files with 3,013 additions and 7,583 deletions.
5 changes: 5 additions & 0 deletions build/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ Change Log

### Enhancements

* Addressed the possible issue noted in issue
https://github.com/mcaceresb/stata-gtools/issues/3 and the functions now
use mata and extended macro functions as applicable.
* `gegen varname = group(varlist)` no longer has holes, as noted in issue
https://github.com/mcaceresb/stata-gtools/issues/4
* `gegen varname = tag(varlist)` no longer tags missing values, as noted
in issue https://github.com/mcaceresb/stata-gtools/issues/5
* `gegen` and `gcollapse` fall back on `collapse` and `egen` in case there
is a collision. Future releases will implement an internal way to resolve
collisions. This is not a huge concern, as SpookyHash has no known
Expand Down
112 changes: 69 additions & 43 deletions build/gcollapse.ado
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ program gcollapse
mata: mata drop __gtools_dll
local path: env PATH
if inlist(substr(`"`path'"', length(`"`path'"'), 1), ";") {
local path = substr("`path'"', 1, length(`"`path'"') - 1)
mata: st_local("path", substr(`"`path'"', 1, `:length local path' - 1))
}
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .)
local __gtools_hashpath: subinstr local __gtools_hashpath "/" "\", all
local newpath `"`path';`__gtools_hashpath'"'
local truncate 2048
if ( `:length local newpath' > `truncate' ) {
Expand Down Expand Up @@ -108,7 +108,6 @@ program gcollapse
di as err "option -oncollision()- must be 'fallback' or 'error'"
exit 198
}
di "debug-`oncollision'"

if ( ("`merge'" != "") & ("`if'" != "") ) {
di as err "combining -merge- with -if- is currently buggy; a fix is planned v0.5.1"
Expand All @@ -133,11 +132,11 @@ program gcollapse
`debug_checkhash' ///
//

local multi = "`r(muti)'"
local plugin_call = "`r(plugin_call)'"
local verbose = `r(verbose)'
local benchmark = `r(benchmark)'
local checkhash = `r(checkhash)'
local multi "`r(muti)'"
local plugin_call "`r(plugin_call)'"
local verbose `r(verbose)'
local benchmark `r(benchmark)'
local checkhash `r(checkhash)'

* While C can read macros, it's generally easier to read scalars
scalar __gtools_verbose = `verbose'
Expand Down Expand Up @@ -184,7 +183,7 @@ program gcollapse
* - check plugin: Use multi-threaded if correctly loaded; fall back
* on single-threaded otherwise.
parse_vars `anything' `if' `in', by(`by') `cw' smart(`smart') v(`verbose') `multi' `debug_force_hash'
local indexed = `r(indexed)'
local indexed `r(indexed)'
if ( `=_N' == 0 ) {
di as err "no observations"
exit 2000
Expand All @@ -208,11 +207,11 @@ program gcollapse
__gtools_uniq_stats(`__gtools_uniq_stats') ///
//

local dropme = "`r(dropme)'"
local keepvars = "`r(keepvars)'"
local added = "`r(added)'"
local memvars = "`r(memvars)'"
local check_recast = "`r(check_recast)'"
local dropme "`r(dropme)'"
local keepvars "`r(keepvars)'"
local added "`r(added)'"
local memvars "`r(memvars)'"
local check_recast "`r(check_recast)'"

* Get a list with all string by variables
local bystr ""
Expand Down Expand Up @@ -291,7 +290,7 @@ program gcollapse
mata: st_numscalar("__gtools_k_recast", cols(__gtools_recastvars))
if ( `=scalar(__gtools_k_recast)' > 0 ) {

local totry = `:list sizeof check_recast'
local totry: list sizeof check_recast
if ( ("`double'" == "") & (`:list sizeof check_recast' > 0) ) {
* Since recasting variables is really expensive, we will not recast
* variables where the summary stat is a sum and the result cannot be
Expand Down Expand Up @@ -409,7 +408,10 @@ program gcollapse
if ( _rc == 42000 ) {
di as err "There may be 128-bit hash collisions!"
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"'
if ( "`oncollision'" == "fallback" ) collision_handler `0'
if ( "`oncollision'" == "fallback" ) {
cap noi collision_handler `0'
exit _rc
}
else exit 42000
}
else if ( _rc != 0 ) exit _rc
Expand Down Expand Up @@ -486,7 +488,10 @@ program gcollapse
if ( _rc == 42000 ) {
di as err "There may be 128-bit hash collisions!"
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"'
if ( "`oncollision'" == "fallback" ) collision_handler `0'
if ( "`oncollision'" == "fallback" ) {
cap noi collision_handler `0'
exit _rc
}
else exit 42000
}
else if ( _rc != 0 ) exit _rc
Expand Down Expand Up @@ -522,7 +527,10 @@ program gcollapse
if ( _rc == 42000 ) {
di as err "There may be 128-bit hash collisions!"
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"'
if ( "`oncollision'" == "fallback" ) collision_handler `0'
if ( "`oncollision'" == "fallback" ) {
cap noi collision_handler `0'
exit _rc
}
else exit 42000
}
else if ( _rc != 0 ) exit _rc
Expand Down Expand Up @@ -570,8 +578,8 @@ program gcollapse
if ( _rc != 0 ) exit _rc
}
else {
local nrow = `:di scalar(__gtools_J)'
local ncol = `:di scalar(__gtools_k_extra)'
local nrow = `=scalar(__gtools_J)'
local ncol = `=scalar(__gtools_k_extra)'
mata: __gtools_data = gtools_get_collapsed (`"`__gtools_file'"', `nrow', `ncol')
mata: st_store(., __gtools_iovars, __gtools_data)
}
Expand Down Expand Up @@ -605,7 +613,7 @@ program gcollapse
* the source).
local dropvars ""
if ( `indexed' ) local dropvars `dropvars' `bysmart'
local dropvars = trim("`dropvars'")
local dropvars `dropvars'
if ( "` dropvars'" != "" ) mata: st_dropvar((`:di subinstr(`""`dropvars'""', " ", `"", ""', .)'))
}

Expand Down Expand Up @@ -795,11 +803,11 @@ program parse_opts, rclass
local checkhash = 1
}

return local multi = "`muti'"
return local plugin_call = "`plugin_call'"
return local verbose = `verbose'
return local benchmark = `benchmark'
return local checkhash = `checkhash'
return local multi = "`muti'"
return local plugin_call = "`plugin_call'"
return local verbose = `verbose'
return local benchmark = `benchmark'
return local checkhash = `checkhash'
end

* Parse summary stats and by variables
Expand Down Expand Up @@ -914,11 +922,13 @@ program parse_vars, rclass
di as error "Invalid stat: (`quantile'; maybe you meant 'max'?)"
error 110
}
local __gtools_stats = subinstr(" `__gtools_stats' ", "`quantile'", regexs(1), .)
local __gtools_uniq_stats = subinstr(" `__gtools_uniq_stats' ", "`quantile'", regexs(1), .)
local __gtools_stats " `__gtools_stats' "
local __gtools_uniq_stats " `__gtools_uniq_stats' "
local __gtools_stats: subinstr local __gtools_stats "`quantile'" "`:di regexs(1)'", all
local __gtools_uniq_stats: subinstr local __gtools_uniq_stats "`quantile'" "`:di regexs(1)'", all
}
local __gtools_stats = trim("`__gtools_stats'")
local __gtools_uniq_stats = trim("`__gtools_uniq_stats'")
local __gtools_stats `__gtools_stats'
local __gtools_uniq_stats `__gtools_uniq_stats'

* Can't collapse grouping variables
* ---------------------------------
Expand Down Expand Up @@ -1120,10 +1130,14 @@ program parse_keep_drop, rclass
if ( "`merge'" == "" ) {
scalar __gtools_merge = 0

local __gtools_vars = subinstr(" `__gtools_vars' ", " ", " ", .)
local __gtools_uniq_vars = subinstr(" `__gtools_uniq_vars' ", " ", " ", .)
local __gtools_keepvars = subinstr(" `__gtools_keepvars' ", " ", " ", .)
local K = `:list sizeof __gtools_targets'
local __gtools_vars " `__gtools_vars' "
local __gtools_uniq_vars " `__gtools_uniq_vars' "
local __gtools_keepvars " `__gtools_keepvars' "

local __gtools_vars: subinstr local __gtools_vars " " " ", all
local __gtools_uniq_vars: subinstr local __gtools_uniq_vars " " " ", all
local __gtools_keepvars: subinstr local __gtools_keepvars " " " ", all
local K: list sizeof __gtools_targets
forvalues k = 1 / `K' {
local k_target: word `k' of `__gtools_targets'
local k_var: word `k' of `__gtools_vars'
Expand All @@ -1133,16 +1147,28 @@ program parse_keep_drop, rclass
if ( `:list k_var in __gtools_uniq_vars' & `r(ok_astarget)' ) {
local __gtools_uniq_vars: list __gtools_uniq_vars - k_var
if ( !`:list k_var in __gtools_targets' ) {
local __gtools_vars = trim(subinstr(" `__gtools_vars' ", " `k_var' ", " `k_target' ", .))
local __gtools_uniq_vars = trim(subinstr(" `__gtools_uniq_vars' ", " `k_var' ", " `k_target' ", .))
local __gtools_keepvars = trim(subinstr(" `__gtools_keepvars' ", " `k_var' ", " `k_target' ", .))
local __gtools_vars " `__gtools_vars' "
local __gtools_uniq_vars " `__gtools_uniq_vars' "
local __gtools_keepvars " `__gtools_keepvars' "
local __gtools_vars: subinstr local __gtools_vars " `k_var' " " `k_target' ", all
local __gtools_uniq_vars: subinstr local __gtools_uniq_vars " `k_var' " " `k_target' ", all
local __gtools_keepvars: subinstr local __gtools_keepvars " `k_var' " " `k_target' ", all
local __gtools_vars `__gtools_vars'
local __gtools_uniq_vars `__gtools_uniq_vars'
local __gtools_keepvars `__gtools_keepvars'
rename `k_var' `k_target'
}
}
}
local __gtools_vars = trim(subinstr(" `__gtools_vars' ", " ", " ", .))
local __gtools_uniq_vars = trim(subinstr(" `__gtools_uniq_vars' ", " ", " ", .))
local __gtools_keepvars = trim(subinstr(" `__gtools_keepvars' ", " ", " ", .))
local __gtools_vars " `__gtools_vars' "
local __gtools_uniq_vars " `__gtools_uniq_vars' "
local __gtools_keepvars " `__gtools_keepvars' "
local __gtools_vars: subinstr local __gtools_vars " " " ", all
local __gtools_uniq_vars: subinstr local __gtools_uniq_vars " " " ", all
local __gtools_keepvars: subinstr local __gtools_keepvars " " " ", all
local __gtools_vars `__gtools_vars'
local __gtools_uniq_vars `__gtools_uniq_vars'
local __gtools_keepvars `__gtools_keepvars'

* slow, but saves mem
if ( `indexed' ) local keepvars `by' `bysmart' `__gtools_keepvars'
Expand Down Expand Up @@ -1337,9 +1363,9 @@ if ( "`c(os)'" == "Windows" ) {
mata: mata drop __gtools_dll
local path: env PATH
if inlist(substr(`"`path'"', length(`"`path'"'), 1), ";") {
local path = substr("`path'"', 1, length(`"`path'"') - 1)
mata: st_local("path", substr(`"`path'"', 1, `:length local path' - 1))
}
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .)
local __gtools_hashpath: subinstr local __gtools_hashpath "/" "\", all
local newpath `"`path';`__gtools_hashpath'"'
local truncate 2048
if ( `:length local newpath' > `truncate' ) {
Expand Down Expand Up @@ -1399,7 +1425,7 @@ program define ParseList
while strpos("`0'", " ") {
local 0: subinstr local 0 " " " "
}
local 0 = trim("`0'")
local 0 `0'

while (trim("`0'") != "") {
GetStat stat 0 : `0'
Expand Down
13 changes: 8 additions & 5 deletions build/gegen.ado
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ program define gegen, byable(onecall)
mata: mata drop __gtools_dll
local path: env PATH
if inlist(substr(`"`path'"', length(`"`path'"'), 1), ";") {
local path = substr("`path'"', 1, length(`"`path'"') - 1)
mata: st_local("path", substr(`"`path'"', 1, `:length local path' - 1))
}
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .)
local __gtools_hashpath: subinstr local __gtools_hashpath "/" "\", all
local newpath `"`path';`__gtools_hashpath'"'
local truncate 2048
if ( `:length local newpath' > `truncate' ) {
Expand Down Expand Up @@ -427,7 +427,10 @@ program define gegen, byable(onecall)
if ( _rc == 42000 ) {
di as err "There may be 128-bit hash collisions!"
di as err `"This is a bug. Please report to {browse "`website_url'":`website_disp'}"'
if ( "`oncollision'" == "fallback" ) collision_handler `00'
if ( "`oncollision'" == "fallback" ) {
cap noi collision_handler `00'
exit _rc
}
else exit 42000
}
else if ( _rc == 42001 ) {
Expand Down Expand Up @@ -601,9 +604,9 @@ if ( "`c(os)'" == "Windows" ) {
mata: mata drop __gtools_dll
local path: env PATH
if inlist(substr(`"`path'"', length(`"`path'"'), 1), ";") {
local path = substr("`path'"', 1, length(`"`path'"') - 1)
mata: st_local("path", substr(`"`path'"', 1, `:length local path' - 1))
}
local __gtools_hashpath = subinstr("`__gtools_hashpath'", "/", "\", .)
local __gtools_hashpath: subinstr local __gtools_hashpath "/" "\", all
local newpath `"`path';`__gtools_hashpath'"'
local truncate 2048
if ( `:length local newpath' > `truncate' ) {
Expand Down
4 changes: 2 additions & 2 deletions build/gtools_tests.do
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Program: gtools_tests.do
* Author: Mauricio Caceres Bravo <mauricio.caceres.bravo@gmail.com>
* Created: Tue May 16 07:23:02 EDT 2017
* Updated: Thu Jul 27 00:08:31 EDT 2017
* Updated: Thu Jul 27 00:54:24 EDT 2017
* Purpose: Unit tests for gtools
* Version: 0.6.9
* Manual: help gcollapse, help gegen
Expand Down Expand Up @@ -64,7 +64,7 @@ program main
di "Consistency checks (vs collapse, egen) $S_TIME $S_DATE"
di "-----------------------------------------------------------"

consistency_gcollapse, `noisily' oncollision(error) debug_checkhash
consistency_gcollapse, `noisily' oncollision(error)
consistency_gcollapse, `noisily' oncollision(error) forceio debug_io_read_method(0)
consistency_gcollapse, `noisily' oncollision(error) forceio debug_io_read_method(1)
consistency_gcollapse, `noisily' oncollision(error) debug_io_check(1) debug_io_threshold(0)
Expand Down
Loading

0 comments on commit 2c3a3fe

Please sign in to comment.