-
Notifications
You must be signed in to change notification settings - Fork 0
/
dec_17_test1.do
executable file
·295 lines (244 loc) · 10.2 KB
/
dec_17_test1.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
//regress Orange and Riverside together, regress LA,VT,SB together
// an updated version of dec_17_test.do
set more off
capture log close
log using dec_17_test1, replace
clear
clear matrix
set mem 1g
use residential_six_final_multi.dta, clear
sum impsqft new_impsqft
//===================== begin Multi ===========================
//data processing
replace sale_price = . if sale_price == 0
replace impsqft = . if impsqft == 0
replace new_impsqft = . if new_impsqft == 0
replace totvalue07 = . if totvalue07 == 0
replace saleyr = . if saleyr == 1899 | saleyr == 0 | saleyr == 1900
gen saleyr_bk = saleyr
//reduce computation by taking a random sample
drop if mz ==0
/*
gen random = runiform()
keep if random < 0.1
*/
sample 1, by (mz)
rename impsqft floor_area1
rename new_impsqft floor_area2
replace floor_area2 = floor_area1 if missing(floor_area2)
gen lgvsq1 = ln( sale_price / floor_area1)
gen lgvsq2 = ln( sale_price / floor_area2)
gen lgvsq_tot_1 = ln( totvalue07 / floor_area1)
gen lgvsq_tot_2 = ln( totvalue07 / floor_area2)
gen pom = hom / (hom + hrm) //percentage of multi housing units that are owner occupied
gen gamma = 0.6 // A-2 : gamma = Rrs / Ros
levelsof tractid, local(lv_tract)
gen zeta = 1.5 //adjust owners overreport their housing values
gen thita = 0.8 // 80% of floor area of multi-unit housing is rentable
encode city , gen(citynum)
xtset citynum
// ======================== 1st round, use orginal impsqft data ==============================
capture drop lgvsq
gen lgvsq = lgvsq1
gen lgvsq_tot = lgvsq_tot_1
capture drop lgvsq2000
capture drop lgvsq2000_tot
capture drop floor_area
gen floor_area = floor_area1
gen vm =.
gen ros = rrs /gamma
gen vrm = .
gen rom = .
gen rvm = .
gen v2000 = .
gen v2000_tot = .
gen lgvsq2000 = .
gen lgvsq2000_tot = .
//=================== regression (Orange & Riverside)======================
gen insample = (mz >= 50 & mz <=66) | (mz >= 81 & mz <=95)
//regression and imputation using SALE_PRICE
xtreg lgvsq fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000 = lgvsq - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample) == 0 & insample
predict lgvsq_p if insample
replace saleyr = saleyr_bk
replace lgvsq2000 = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
//regression and imputation using TOTVALUE07
xtreg lgvsq_tot fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000_tot = lgvsq_tot - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample)==0 & insample
predict lgvsq_p if insample
replace lgvsq2000_tot = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
//compare by Riverside and Orange, two scalars
//----------scalar(xo_residential)
replace insample = (mz >= 50 & mz <=66)
replace v2000 = exp(lgvsq2000) * floor_area if insample
replace v2000_tot = exp(lgvsq2000_tot) * floor_area if insample
egen tp = total(v2000) if insample
egen tv = total(v2000_tot) if insample
gen vp = tp / tv
sum vp, meanonly
scalar xo_residential = r(mean)
drop tp tv vp
//---------scalar(xr_residential)
replace insample = (mz >= 81 & mz <=95)
replace v2000 = exp(lgvsq2000) * floor_area if insample
replace v2000_tot = exp(lgvsq2000_tot) * floor_area if insample
egen tp = total(v2000) if insample
egen tv = total(v2000_tot) if insample
gen vp = tp / tv
sum vp, meanonly
scalar xr_residential = r(mean)
drop tp tv vp
//========== LA, VT, SB ===========
replace insample = (mz >= 1 & mz <=46) | (mz >= 47 & mz <=49) | (mz >= 67 & mz <=80)
xtreg lgvsq_tot fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000_tot = lgvsq_tot - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample)==0 & insample
predict lgvsq_p if insample
replace lgvsq2000_tot = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
replace insample = (mz >= 1 & mz <=46) | (mz >= 47 & mz <=49)
replace v2000 = exp(lgvsq2000_tot) * floor_area / scalar(xo_residential) if insample
replace insample = (mz >= 67 & mz <=80)
replace v2000 = exp(lgvsq2000_tot) * floor_area / scalar(xr_residential) if insample
//================================================= rent to value ratioo ==========================
foreach lv of local lv_tract {
sum v2000 if tractid == `lv', meanonly
replace vm = r(mean) if tractid == `lv'
}
replace vrm = (zeta * vm - vom * pom) / (1 - pom)
replace rom = vom * ros / vos
replace rvm = (rom * pom + rrm * (1 - pom)) / (vom * pom + vrm * (1 - pom))
gen r2000 = v2000 * rvm * 12 //rents in Census is montly!
//======================================aggregate to model zone level ====================================
gen tsample_v = !missing(floor_area) & !missing(v2000)
gen tsample_r = !missing(floor_area) & !missing(r2000)
bysort mz: egen v_mz = total(v2000) if tsample_v
bysort mz: egen r_mz = total(r2000) if tsample_r
bysort mz: egen fa_mz_v = total(floor_area) if tsample_v
bysort mz: egen fa_mz_r = total(floor_area) if tsample_r
gen vsq_mz = v_mz / fa_mz_v
gen rsq_mz = r_mz / fa_mz_r
replace rsq_mz = rsq_mz / thita
drop tsample_v tsample_r v_mz r_mz fa_mz_v fa_mz_r
save dec18_five_residential_multi.dta, replace
rename vsq_mz vsq_mz1
rename rsq_mz rsq_mz1
// ======================== 2nd round, use updated floor area data from Yizhen Gu ==============================
capture drop lgvsq
capture drop lgvsq_tot
gen lgvsq = lgvsq2
gen lgvsq_tot = lgvsq_tot_2
capture drop lgvsq2000
capture drop lgvsq2000_tot
capture drop floor_area
gen floor_area = floor_area2
capture drop vm ros vrm rom rvm
capture drop v2000 r2000 v2000_tot
gen vm =.
gen ros = rrs /gamma
gen vrm = .
gen rom = .
gen rvm = .
gen v2000 = .
gen r2000 = .
gen v2000_tot = .
gen lgvsq2000 = .
gen lgvsq2000_tot = .
//=================== regression (Orange & Riverside)======================
replace insample = (mz >= 50 & mz <=66) | (mz >= 81 & mz <=95)
//regression and imputation using SALE_PRICE
xtreg lgvsq fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000 = lgvsq - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample) == 0 & insample
predict lgvsq_p if insample
replace saleyr = saleyr_bk
replace lgvsq2000 = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
//regression and imputation using TOTVALUE07
xtreg lgvsq_tot fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000_tot = lgvsq_tot - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample)==0 & insample
predict lgvsq_p if insample
replace lgvsq2000_tot = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
//compare by Riverside and Orange, two scalars
//----------scalar(xo_residential)
replace insample = (mz >= 50 & mz <=66)
replace v2000 = exp(lgvsq2000) * floor_area if insample
replace v2000_tot = exp(lgvsq2000_tot) * floor_area if insample
egen tp = total(v2000) if insample
egen tv = total(v2000_tot) if insample
gen vp = tp / tv
sum vp, meanonly
scalar xo_residential = r(mean)
drop tp tv vp
//---------scalar(xr_residential)
replace insample = (mz >= 81 & mz <=95)
replace v2000 = exp(lgvsq2000) * floor_area if insample
replace v2000_tot = exp(lgvsq2000_tot) * floor_area if insample
egen tp = total(v2000) if insample
egen tv = total(v2000_tot) if insample
gen vp = tp / tv
sum vp, meanonly
scalar xr_residential = r(mean)
drop tp tv vp
//========== LA, VT, SB ===========
replace insample = (mz >= 1 & mz <=46) | (mz >= 47 & mz <=49) | (mz >= 67 & mz <=80)
xtreg lgvsq_tot fsub cbd fwy ocean i.saleyr i.lu_08 if insample & !missing(saleyr), fe
levelsof saleyr if e(sample), local(lvsaleyr)
foreach lv of local lvsaleyr {
capture replace lgvsq2000_tot = lgvsq_tot - _b[`lv'.saleyr] + _b[2000.saleyr] if saleyr == `lv' & e(sample)
}
replace saleyr = 2000 if e(sample)==0 & insample
predict lgvsq_p if insample
replace lgvsq2000_tot = lgvsq_p if e(sample) == 0 & insample
drop lgvsq_p
replace insample = (mz >= 1 & mz <=46) | (mz >= 47 & mz <=49)
replace v2000 = exp(lgvsq2000_tot) * floor_area / scalar(xo_residential) if insample
replace insample = (mz >= 67 & mz <=80)
replace v2000 = exp(lgvsq2000_tot) * floor_area / scalar(xr_residential) if insample
//================================================= rent to value ratioo ==========================
foreach lv of local lv_tract {
sum v2000 if tractid == `lv', meanonly
replace vm = r(mean) if tractid == `lv'
}
replace vrm = (zeta * vm - vom * pom) / (1 - pom)
replace rom = vom * ros / vos
replace rvm = (rom * pom + rrm * (1 - pom)) / (vom * pom + vrm * (1 - pom))
replace r2000 = v2000 * rvm * 12 //rents in Census is montly!
//======================================aggregate to model zone level ====================================
gen tsample_v = !missing(floor_area) & !missing(v2000)
gen tsample_r = !missing(floor_area) & !missing(r2000)
bysort mz: egen v_mz = total(v2000) if tsample_v
bysort mz: egen r_mz = total(r2000) if tsample_r
bysort mz: egen fa_mz_v = total(floor_area) if tsample_v
bysort mz: egen fa_mz_r = total(floor_area) if tsample_r
gen vsq_mz = v_mz / fa_mz_v
gen rsq_mz = r_mz / fa_mz_r
replace rsq_mz = rsq_mz / thita
drop tsample_v tsample_r v_mz r_mz fa_mz_v fa_mz_r
save dec18_five_residential_multi.dta, replace
rename vsq_mz vsq_mz2
rename rsq_mz rsq_mz2
save dec18_five_residential_multi.dta, replace
duplicates drop vsq_mz1 vsq_mz2, force
outsheet vsq_mz1 vsq_mz2 rsq_mz1 rsq_mz2 mz name using mz_residential_multi_dec18.csv, comma replace