Skip to content

Commit

Permalink
Fix bug in fcollapse (quantile) as reported by Jake Mortenson
Browse files Browse the repository at this point in the history
  • Loading branch information
sergiocorreia committed May 6, 2022
1 parent 040eb64 commit f308660
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 17 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,8 @@ Similarly, once the identifiers are created, sorting other variables by these id

Mata's `asarray()` has a key problem: it is very slow with hash collisions (which you see a lot in this use case). Thus, I avoid using `asarray()` and instead use `hash1()` to create a hash table with open addressing (see a comparision between both approaches [here](http://www.algolist.net/Data_structures/Hash_table/Open_addressing#open_addressing_vs_chaining)).



## Updates

- `2.49.0 06may2022`: fixed a bug in `fcollapse` with quantiles (p**, median, and iqr stats). `ftools` computes these statistics using `moremata` and had failed to update its function arguments as required by recent changes in moremata.
4 changes: 2 additions & 2 deletions src/fcollapse.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! version 2.31.2 17dec2018
*! version 2.49.0 06may2022
program define fcollapse
cap noi Inner `0'
loc rc = c(rc)
Expand Down Expand Up @@ -81,7 +81,7 @@ program define Inner
}

// Check dependencies
cap qui mata: mata which _mm_quantile()
cap qui mata: mata which mm_quantile()
loc rc = c(rc)
if (`rc') {
di as error "SSC Package Moremata required (to compute quantiles)"
Expand Down
4 changes: 2 additions & 2 deletions src/fcollapse.sthlp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
2.27.1 23apr2018{smcl}
{* *! version 2.27.0 20apr2018}{...}
2.49.0 06may2022{smcl}
{* *! version 2.49.0 06may2022}{...}
{vieweralsosee "ftools" "help ftools"}{...}
{vieweralsosee "[R] collapse" "help collapse"}{...}
{vieweralsosee "[R] contract" "help contract"}{...}
Expand Down
20 changes: 10 additions & 10 deletions src/fcollapse_functions.mata
Original file line number Diff line number Diff line change
Expand Up @@ -197,27 +197,28 @@ mata set matastrict on
{
`Integer' i
`Vector' results, tmp_data, tmp_weights
`Boolean' has_fweight
results = J(F.num_levels, 1, .)
if (wtype == "") {
for (i = 1; i <= F.num_levels; i++) {
// SYNTAX: _mm_quantile(data, weights, quantiles, altdef)
// SYNTAX: mm_quantile(data, | w, P, altdef)
// SYNTAX: mm_quantile(data, | weights, P, def, fw?, ..)
tmp_data = panelsubmatrix(data, i, F.info)
tmp_data = select(tmp_data, tmp_data :< .)
if (rows(tmp_data) == 0) continue
results[i] = _mm_quantile(tmp_data, 1, P, 0)
results[i] = mm_quantile(tmp_data, 1, P, 2)
}
}
else {
has_fweight = wtype == "fweight"
for (i = 1; i <= F.num_levels; i++) {
tmp_data = panelsubmatrix(data, i, F.info)
tmp_weights = panelsubmatrix(weights, i, F.info)
tmp_weights = select(tmp_weights, tmp_data :< .)
tmp_data = select(tmp_data, tmp_data :< .)
if (rows(tmp_data) == 0) continue
results[i] = _mm_quantile(tmp_data, tmp_weights, P, 0)
results[i] = mm_quantile(tmp_data, tmp_weights, P, 2, has_fweight)
}
}
Expand All @@ -230,32 +231,31 @@ mata set matastrict on
`Integer' i
`Vector' results, tmp_data, tmp_weights, P
`RowVector' tmp_iqr
`Boolean' has_fweight
results = J(F.num_levels, 1, .)
P = (0.25\0.75)
if (wtype == "") {
for (i = 1; i <= F.num_levels; i++) {
// SYNTAX: _mm_quantile(data, weights, quantiles, altdef)
// SYNTAX: mm_quantile(data, | w, P, altdef)
// SYNTAX: mm_iqrange(X [, w, def, fw, wd])
tmp_data = panelsubmatrix(data, i, F.info)
tmp_data = select(tmp_data, tmp_data :< .)
if (rows(tmp_data) == 1) results[i] = 0
if (rows(tmp_data) <= 1) continue
tmp_iqr = _mm_quantile(tmp_data, 1, P, 0)
results[i] = tmp_iqr[2] - tmp_iqr[1]
results[i] = mm_iqrange(tmp_data, 1, 2)
}
}
else {
has_fweight = wtype == "fweight"
for (i = 1; i <= F.num_levels; i++) {
tmp_data = panelsubmatrix(data, i, F.info)
tmp_weights = panelsubmatrix(weights, i, F.info)
tmp_weights = select(tmp_weights, tmp_data :< .)
tmp_data = select(tmp_data, tmp_data :< .)
if (rows(tmp_data) == 1) results[i] = 0
if (rows(tmp_data) <= 1) continue
tmp_iqr = _mm_quantile(tmp_data, tmp_weights, P, 0)
results[i] = tmp_iqr[2] - tmp_iqr[1]
results[i] = mm_iqrange(tmp_data, tmp_weights, 2, has_fweight)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/ftools.ado
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*! version 2.48.0 29mar2021
*! version 2.49.0 06may2022
* This file is just used to compile ftools.mlib

program define ftools
Expand Down
2 changes: 1 addition & 1 deletion src/ftools.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ d
d Requires: Stata version 11.2
d (Stata 12 or older also require the boottest package from ssc)
d
d Distribution-Date: 20191311
d Distribution-Date: 20220506
d

f ftools.ado
Expand Down
2 changes: 1 addition & 1 deletion src/ftools.sthlp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{smcl}
{* *! version 2.39.0 03jun2020}{...}
{* *! version 2.49.0 06may2022}{...}
{vieweralsosee "fegen" "help fegen"}{...}
{vieweralsosee "fcollapse" "help fcollapse"}{...}
{vieweralsosee "join" "help join"}{...}
Expand Down

0 comments on commit f308660

Please sign in to comment.