/
calipmatch.ado
258 lines (196 loc) · 7.44 KB
/
calipmatch.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
*! version 1.0.0 9may2017 Michael Stepner and Allan Garland, stepner@mit.edu
/* CC0 license information:
To the extent possible under law, the author has dedicated all copyright and related and neighboring rights
to this software to the public domain worldwide. This software is distributed without any warranty.
This code is licensed under the CC0 1.0 Universal license. The full legal text as well as a
human-readable summary can be accessed at http://creativecommons.org/publicdomain/zero/1.0/
*/
* Why did I include a formal license? Jeff Atwood gives good reasons: https://blog.codinghorror.com/pick-a-license-any-license/
program define calipmatch, sortpreserve rclass
version 13.0
syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)]
* Verify there are same number of caliper vars as caliper widths
local caliper_var_count : word count `calipermatch'
local caliper_width_count : word count `caliperwidth'
if (`caliper_var_count'!=`caliper_width_count') {
di as error "must specify the same number of caliper widths as caliper matching variables."
if (`caliper_var_count'<`caliper_width_count') exit 123
else exit 122
}
* Verify that all exact matching variables have integer data tyes
if ("`exactmatch'"!="") {
foreach var of varlist `exactmatch' {
cap confirm byte variable `var', exact
if _rc==0 continue
cap confirm int variable `var', exact
if _rc==0 continue
cap confirm long variable `var', exact
if _rc==0 continue
di as error "Exact matching variables must have data type {it:byte}, {it:int}, or {it:long}."
cap confirm numeric variable `var', exact
if _rc==0 di as error "Use the {help recast} command or caliper matching for variable: `var'."
else di as error "Use the {help destring} command or another method to change the datatype for variable: `var'."
exit 198
}
}
* Verify that we can create the new variable specified
confirm new variable `generate', exact
* Mark the sample with necessary vars non-missing
marksample touse
markout `touse' `casevar' `calipermatch' `exactmatch'
* Verify that case/control var is always 0 or 1 in sample
cap assert `casevar'==0 | `casevar'==1 if `touse'==1
if _rc==9 {
di as error "casevar() must always be 0 or 1 in the sample."
exit 198
}
error _rc
* Sort into groups for caliper matching, randomizing order of cases and controls
tempvar rand
gen float `rand'=runiform()
sort `touse' `exactmatch' `casevar' `rand'
* Count the number of total obs and cases in sample
qui count if `touse'==1
local insample_total = r(N)
if (`insample_total'==0) {
di as error "no observations in sample"
exit 2000
}
qui count if `casevar'==1 in `=_N-`insample_total'+1'/`=_N'
local cases_total = r(N)
if (`insample_total'==`cases_total') {
di as error "no control observations in sample"
exit 2001
}
if (`cases_total'==0) {
di as error "no case observations in sample"
exit 2001
}
* Find group boundaries
mata: boundaries=find_group_boundaries("`exactmatch'", "`casevar'", `=_N-`insample_total'+1', `=_N')
* Perform matching within each group
qui gen long `generate'=.
tempname case_matches
if r(no_matches)==0 {
mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'")
qui compress `generate'
matrix `case_matches'=r(matchsuccess)
matrix `case_matches' = (`cases_total' - `case_matches''* J(rowsof(`case_matches'),1,1)) \ `case_matches'
}
else {
matrix `case_matches'=`cases_total' \ J(`maxmatches', 1, 0)
}
* Print report on match rate
local cases_matched = `cases_total'-`case_matches'[1,1]
local match_rate_print = string(`cases_matched'/`cases_total'*100,"%9.1f")
di `"`match_rate_print'% match rate."'
di `"`=string(`cases_matched',"%16.0fc")' out of `=string(`cases_total',"%16.0fc")' cases matched."'
di ""
di "Successful matches for each case"
di "--------------------------------"
forvalues m=0/`maxmatches' {
local count=`case_matches'[`m'+1,1]
local percent=string(`count'/`cases_total'*100,"%9.1f")
local rownames `rownames' `m'
di "`m' matched control obs: `count' (`percent'%)"
}
* Return match success rate
matrix rownames `case_matches' = `rownames'
matrix colnames `case_matches' = "count"
return clear
return scalar match_rate = `cases_matched'/`cases_total'
return scalar cases_matched = `cases_matched'
return scalar cases_total = `cases_total'
return matrix matches = `case_matches'
end
version 13.0
set matastrict on
mata:
void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth) {
real scalar matchgrp
matchgrp = st_varindex(genvar)
real rowvector matchvars
matchvars = st_varindex(tokens(calipvars))
real rowvector tolerance
tolerance = strtoreal(tokens(calipwidth))
real scalar curmatch
curmatch = 0
real colvector matchsuccess
matchsuccess = J(maxmatch, 1, 0)
real scalar brow
real scalar caseobs
real scalar controlobs
real scalar casematchcount
real rowvector matchvals
real rowvector controlvals
real matrix matchbounds
for (brow=1; brow<=rows(boundaries); brow++) {
for (caseobs=boundaries[brow,3]; caseobs<=boundaries[brow,4]; caseobs++) {
curmatch++
casematchcount=0
_st_store(caseobs, matchgrp, curmatch)
matchvals = st_data(caseobs, matchvars)
matchbounds = (matchvals-tolerance)\(matchvals+tolerance)
for (controlobs=boundaries[brow,1]; controlobs<=boundaries[brow,2]; controlobs++) {
if (_st_data(controlobs, matchgrp)!=.) continue
controlvals = st_data(controlobs, matchvars)
if (controlvals>=matchbounds[1,.] & controlvals<=matchbounds[2,.]) {
casematchcount++
_st_store(controlobs, matchgrp, curmatch)
}
if (casematchcount==maxmatch) break
}
if (casematchcount==0) {
curmatch--
_st_store(caseobs, matchgrp, .)
}
else {
matchsuccess[casematchcount,1] = matchsuccess[casematchcount,1]+1
}
}
}
stata("return clear")
st_matrix("r(matchsuccess)",matchsuccess)
}
real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) {
real matrix boundaries
boundaries = (startobs, ., ., .)
real scalar nextcol
nextcol=2
real scalar currow
currow=1
real rowvector groupvars
groupvars = st_varindex(tokens(grpvars))
real scalar casevarnum
casevarnum = st_varindex(casevar)
real scalar obs
for (obs=startobs+1; obs<=endobs; obs++) {
if (st_data(obs, groupvars)!=st_data(obs-1, groupvars)) {
if (nextcol==4) {
boundaries[currow,4]=obs-1
boundaries=boundaries\(obs, ., ., .)
nextcol=2
currow=currow+1
}
else { // only one value of casevar in prev group --> skip group
boundaries[currow,1]=obs
}
}
else if (_st_data(obs, casevarnum)!=_st_data(obs-1, casevarnum)) {
boundaries[currow,2]=obs-1
boundaries[currow,3]=obs
nextcol=4
}
}
stata("return clear")
st_numscalar("r(no_matches)",0)
if (nextcol==4) {
boundaries[currow,nextcol]=endobs
return (boundaries)
}
else {
if (currow>1) return (boundaries[1..rows(boundaries)-1, .])
else st_numscalar("r(no_matches)",1)
}
}
end