Skip to content
Permalink
Browse files

Merge pull request #1233 from josef-pkt/kernels_bug_1208

sandbox kernels bugs uniform kernel and confint closes #1208
  • Loading branch information...
josef-pkt committed Dec 18, 2013
2 parents c0a62a0 + 3892ef9 commit 62a7c7e7c853f7f89146a838e6af57de1e3bb99c
@@ -0,0 +1,41 @@
ship,service,accident,lnservice,x_epa,s_epa,se_epa,s_epan2,se_epan2,s_bi,se_bi,s_cos,se_cos,s_gau,se_gau,s_par,se_par,s_rec,se_rec,s_tri,se_tri
1,127,0,4.8441871,3.8066625,.12329346,.14769943,0,0,0,,0,,.31789228,.4052318,0,0,0,0,0,0
1,63,0,4.1431347,3.9837171,.30532974,.3303055,0,0,0,0,0,,.41252919,.42109712,0,0,0,0,0,0
1,1095,3,6.9985096,4.1607717,.4772784,.40478135,0,0,0,0,0,0,.52838333,.43606074,0,0,0,0,0,0
1,1095,4,6.9985096,4.3378263,.61282275,.44758809,0,0,0,0,0,0,.66734481,.45049239,0,0,0,0,0,0
1,1512,6,7.3211886,4.5148809,.80280046,.47485868,0,0,0,0,0,0,.83052757,.46479833,0,0,0,0,0,0
1,3353,18,8.1176107,4.6919355,1.0210594,.49654713,.03706897,.09236707,.0074708,.05803329,0,0,1.0180961,.47929628,.00043803,.01972916,.125,.13028878,.02555028,.08658423
1,,,,4.8689901,1.2441905,.49923789,.10698662,.14421609,.05711468,.12184728,0,0,1.229218,.49413223,.01397375,.08767939,.375,.20365216,.08187182,.13847185
1,2244,11,7.7160153,5.0460447,1.5644613,.51395645,.2555246,.1899172,.16885066,.17972138,0,0,1.4621654,.50929012,.07298214,.16104218,.375,.20365216,.2133559,.18540563
2,44882,39,10.711792,5.2230993,1.8424735,.5198217,.65594898,.32723303,.40192126,.2890999,.0710275,.31763581,1.7145553,.52469647,.21816525,.24403735,1.25,.35538055,.53410384,.32639086
2,17176,29,9.7512683,5.4001539,2.0737941,.52816297,.99872335,.59744092,.79424007,.46997954,.23735253,.34395122,1.9836917,.54036516,.48683035,.34223342,1.2,.67885023,.86017097,.55800251
2,28609,58,10.261477,5.5772085,2.3894807,.53787442,1.2640475,.69937166,1.2004473,.71128886,.5048966,.30632371,2.2669573,.5565033,.90661691,.5758976,1.1818182,.6239406,1.1443251,.68690512
2,20370,53,9.9218185,5.7542631,2.699925,.55139629,1.6381974,.73260634,1.5530396,.78276673,1.3001163,.31095639,2.5622095,.57352395,1.5053893,.82142495,2,.71726207,1.5784271,.74771943
2,7064,12,8.8627667,5.9313178,3.002866,.57346272,2.0201789,.77831474,1.9716411,.81327887,2.8162974,1.226396,2.8681543,.59196672,2.1398785,.9260037,2.1666667,.78337127,2.066329,.80936863
2,13099,44,9.4802912,6.1083724,3.3466309,.60300519,2.3937704,.74851451,2.3934974,.86449113,3.1926703,1.3202449,3.1846931,.61238059,2.5756388,1.0828446,2.9411765,.71383498,2.4929507,.82300954
2,,,,6.285427,3.6446535,.62661919,2.9643522,.78699236,2.889543,.87761263,2.4193017,1.0956887,3.5132647,.63524734,2.7781945,1.0924244,3.125,.76355747,2.8671936,.8379938
2,7117,18,8.8702416,6.4624816,3.9139935,.64897564,3.4327948,.81041468,3.3693418,.89571115,2.5602516,1.0064842,3.8572215,.66100741,3.0590496,1.0317798,3.2941176,.71385854,3.3151737,.84782678
3,1179,1,7.0724219,6.6395362,4.1691041,.66908117,3.8040168,.86832264,3.7325837,.91037091,3.3404117,1.2808264,4.2222958,.69019316,3.5318061,1.0112434,3.5882353,.7438199,3.7463715,.90365001
3,552,1,6.313548,6.8165908,4.5750253,.68819345,4.2162402,.8637004,4.0628918,.91508898,4.1919688,1.239588,4.6172215,.72360499,4.0858157,1.0109114,4.6666667,.81728135,4.1712169,.88953834
3,781,0,6.6605751,6.9936454,4.9590387,.71212367,4.6200341,.88433507,4.5578482,.93666243,4.734061,1.3656841,5.0545687,.76243461,4.5991536,1.0830329,4.8235294,.86176159,4.5935322,.92215582
3,676,1,6.5161931,7.1707,5.3171402,.73568314,5.1443835,.94285029,5.146322,1.0136781,5.0695209,1.4684254,5.5518247,.80827875,5.1049637,1.1584437,5.4705882,.86431651,5.1522148,.98876585
3,783,6,6.6631327,7.3477546,5.8518296,.80936679,5.9570816,1.0121809,5.869871,1.0893313,5.5644763,1.5825716,6.1326807,.86305962,5.7554599,1.2308547,6.0666667,.960576,5.9031017,1.0528421
3,1948,2,7.5745585,7.5248092,6.5467223,.89781811,6.6851413,1.0735022,6.7320744,1.1767386,6.6235468,1.5821764,6.8283326,.92891809,6.6689378,1.3860984,6.4285714,.98966745,6.6430143,1.1449776
3,,,,7.7018638,7.4652016,.97790307,7.4187558,1.2041802,7.7077993,1.3088584,7.6290218,1.8955101,7.6783363,1.0081701,7.9153409,1.5771601,7,1.1438753,7.6103718,1.2863877
3,274,1,5.6131281,7.8789184,8.7744321,1.0876045,8.2385678,1.4807375,8.9003175,1.5371139,10.267983,1.0761361,8.7301677,1.1034479,9.574323,1.7242064,8.2307692,1.1864501,8.608801,1.5508764
4,251,0,5.5254529,8.055973,10.10718,1.1820417,10.156171,1.7431284,10.423112,1.7077034,15.698618,2.0223725,10.036203,1.2181548,11.796516,1.6800922,9.5,1.6110674,10.634889,1.8029673
4,105,0,4.6539604,8.2330276,11.878725,1.2731189,11.401627,1.509397,12.24627,1.4673791,,,11.646691,1.357218,14.396004,1.8972035,10.375,1.5971147,11.974029,1.5502406
4,288,0,5.6629605,8.4100822,13.813034,1.3843952,12.862858,1.451617,14.266549,1.718067,17.675596,,13.598009,1.5277058,15.847903,2.2735634,11,1.2911926,13.384157,1.5365556
4,192,0,5.2574954,8.5871368,16.138063,1.6293195,16.768068,3.4120536,16.063861,3.3815827,14.949336,,15.89769,1.7383417,15.550994,2.7209926,17,2.6971107,16.475888,3.500466
4,349,2,5.8550719,8.7641914,18.704726,1.9339696,20.279878,3.3885785,18.116286,2.4045809,14.976542,7.756e-06,18.511042,1.9969536,15.776174,2.7247086,24.2,5.6427742,18.864818,3.1309913
4,1208,11,7.0967214,8.941246,21.454671,2.2765158,23.176469,5.1846819,21.147695,4.9939902,15.016853,,21.356577,2.3061881,17.744601,3.5298145,29,4.4732851,21.420318,5.0398337
4,,,,9.1183007,24.227244,2.6267109,27.735332,5.1798876,25.593453,4.6681497,19.453089,,24.315601,2.659853,23.054296,3.2635001,31.2,5.6100834,26.538277,4.9479733
4,2051,4,7.6260828,9.2953553,27.501067,3.13848,31.009946,5.0669349,30.335859,5.5939608,40.18724,3.1107716,27.254535,3.0428081,31.343609,5.9851096,35.666667,4.2258687,31.132646,5.2766521
5,45,0,3.8066625,9.4724099,29.865417,3.480465,35.280242,4.7762274,35.77169,5.631128,39.87102,,30.051145,3.4350708,37.903318,8.2136836,35.666667,4.2258687,36.110331,5.3526138
5,,,,9.6494645,32.366704,3.8884098,39.295489,5.0504157,41.0729,6.0118254,39.349167,1.107e-06,32.61407,3.8177,41.107756,7.7374106,35.666667,4.2258687,39.712318,5.6418531
5,789,7,6.6707663,9.8265191,35.170375,4.4347685,43.943554,5.6605077,44.758047,6.8787882,41.454683,9.4239941,34.890045,4.1771912,43.653343,7.9574455,36.142857,3.9463267,43.787637,6.1605763
5,437,7,6.0799332,10.003574,36.866635,4.6921764,45.338485,6.4698356,45.904662,6.681533,48.035083,,36.860218,4.5068711,46.854317,7.1532436,44.6,6.4022513,45.882313,6.5299339
5,1157,5,7.0535857,10.180628,37.901392,4.6895847,45.632833,6.2523373,46.75837,6.1857605,55.415952,,38.530771,4.8058845,49.774608,5.8191326,44.6,6.4022513,46.878849,6.2036373
5,2161,12,7.6783264,10.357683,39.130413,4.2530464,46.064455,6.1045296,47.441308,5.432277,54.583244,,39.922825,5.0771783,50.221868,4.3469597,44.6,6.4022513,47.268841,6.0161708
5,,,,10.534737,40.012261,4.3810509,46.7329,3.4000914,47.671381,3.7043449,46.067596,,41.064426,5.3255602,47.596159,4.2464822,44.75,3.2324493,46.923605,3.5550717
5,542,1,6.295266,10.711792,41.109675,4.5732279,47.725125,3.8355989,46.849267,4.3269949,39.448327,,41.98531,5.5562919,43.85307,5.0459727,44.75,3.2324493,46.219712,4.171591
@@ -0,0 +1,154 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 14 17:23:25 2013
Author: Josef Perktold
"""

import os
import numpy as np
from statsmodels.sandbox.nonparametric import kernels

from numpy.testing import assert_allclose, assert_array_less

DEBUG = 0

curdir = os.path.dirname(os.path.abspath(__file__))
fname = 'results/results_kernel_regression.csv'
results = np.recfromcsv(os.path.join(curdir, fname))

y = results['accident']
x = np.log(results['service'])
use_mask = ~np.isnan(x)
x = x[use_mask]
y = y[use_mask]
xg = np.linspace(x.min(), x.max(), 40) # grid points default in Stata

#kern_name = 'gau'
#kern = kernels.Gaussian()
#kern_name = 'epan2'
#kern = kernels.Epanechnikov()
#kern_name = 'rec'
#kern = kernels.Uniform() # ours looks awful
#kern_name = 'tri'
#kern = kernels.Triangular()
#kern_name = 'cos'
#kern = kernels.Cosine() #doesn't match up, nan in Stata results ?
#kern_name = 'bi'
#kern = kernels.Biweight()

class CheckKernelMixin(object):

se_rtol = 0.7
upp_rtol = 0.1
low_rtol = 0.2
low_atol = 0.3

def test_smoothconf(self):
kern_name = self.kern_name
kern = self.kern
#fittedg = np.array([kernels.Epanechnikov().smoothconf(x, y, xi) for xi in xg])
fittedg = np.array([kern.smoothconf(x, y, xi) for xi in xg])
# attach for inspection from outside of test run
self.fittedg = fittedg

res_fitted = results['s_' + kern_name]
res_se = results['se_' + kern_name]
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
# implied standard deviation from conf_int
se = (fittedg[:, 2] - fittedg[:, 1]) / crit
fitted = fittedg[:, 1]

# check both rtol & atol
assert_allclose(fitted, res_fitted, rtol=5e-7, atol=1e-20)
assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)

# TODO: check we are using a different algorithm for se
# The following are very rough checks

self.se = se
self.res_se = res_se
se_valid = np.isfinite(res_se)
if np.any(~se_valid):
print 'nan in stata result', self.__class__.__name__
assert_allclose(se[se_valid], res_se[se_valid], rtol=self.se_rtol, atol=0.2)
# check that most values are closer
mask = np.abs(se - res_se) > (0.2 + 0.2 * res_se)
if not hasattr(self, 'se_n_diff'):
se_n_diff = 40 * 0.125
else:
se_n_diff = self.se_n_diff
assert_array_less(mask.sum(), se_n_diff + 1) # at most 5 large diffs

if DEBUG:
# raises: RuntimeWarning: invalid value encountered in divide
print fitted / res_fitted - 1
print se / res_se - 1
# Stata only displays ci, doesn't save it
res_upp = res_fitted + crit * res_se
res_low = res_fitted - crit * res_se
self.res_fittedg = np.column_stack((res_low, res_fitted, res_upp))
if DEBUG:
print fittedg[:, 2] / res_upp - 1
print fittedg[:, 2] - res_upp
print fittedg[:, 0] - res_low
print np.max(np.abs(fittedg[:, 2] / res_upp - 1))
assert_allclose(fittedg[se_valid, 2], res_upp[se_valid],
rtol=self.upp_rtol, atol=0.2)
assert_allclose(fittedg[se_valid, 0], res_low[se_valid],
rtol=self.low_rtol, atol=self.low_atol)

#assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)

def t_est_smoothconf_data(self):
kern = self.kern
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
# no reference results saved to csv yet
fitted_x = np.array([kern.smoothconf(x, y, xi) for xi in x])
if DEBUG:
print (fitted_x[:, 2] - fitted_x[:, 1]) / crit

class TestEpan(CheckKernelMixin):
kern_name = 'epan2'
kern = kernels.Epanechnikov()

class TestGau(CheckKernelMixin):
kern_name = 'gau'
kern = kernels.Gaussian()

class TestUniform(CheckKernelMixin):
kern_name = 'rec'
kern = kernels.Uniform()
se_rtol = 0.8
se_n_diff = 8
upp_rtol = 0.4
low_rtol = 0.2
low_atol = 0.8

class TestTriangular(CheckKernelMixin):
kern_name = 'tri'
kern = kernels.Triangular()
se_n_diff = 10
upp_rtol = 0.15
low_rtol = 0.3

class T_estCosine(CheckKernelMixin):
# Stata results for Cosine look strange, has nans
kern_name = 'cos'
kern = kernels.Cosine2()

class TestBiweight(CheckKernelMixin):
kern_name = 'bi'
kern = kernels.Biweight()
se_n_diff = 9
low_rtol = 0.3

if __name__ == '__main__':
tt = TestEpan()
tt = TestGau()
tt = TestBiweight()
tt.test_smoothconf()
diff_rel = tt.fittedg / tt.res_fittedg - 1
diff_abs = tt.fittedg - tt.res_fittedg
mask = np.abs(tt.fittedg - tt.res_fittedg) > (0.3 + 0.1 * tt.res_fittedg)
@@ -291,23 +291,27 @@ def smoothvar(self, xs, ys, x):
else:
return np.nan

def smoothconf(self, xs, ys, x):
def smoothconf(self, xs, ys, x, alpha=0.05):
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
"""
xs, ys = self.in_domain(xs, ys, x)

if len(xs) > 0:
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
#fittedvals = self.smooth(xs, ys, x) # x or xs in Haerdle
sqresid = square(
subtract(ys, fittedvals)
)
w = np.sum(self((xs-x)/self.h))
#var = sqresid.sum() / (len(sqresid) - 0) # nonlocal var ? JP just trying
v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
var = v / w
sd = np.sqrt(var)
K = self.L2Norm
yhat = self.smooth(xs, ys, x)
err = sd * K / np.sqrt(w * self.h * self.norm_const)
from scipy import stats
crit = stats.norm.isf(alpha / 2)
err = crit * sd * np.sqrt(K) / np.sqrt(w * self.h * self.norm_const)
return (yhat - err, yhat, yhat + err)
else:
return (np.nan, np.nan, np.nan)
@@ -365,7 +369,7 @@ def __call__(self, x):

class Uniform(CustomKernel):
def __init__(self, h=1.0):
CustomKernel.__init__(self, shape=lambda x: 0.5, h=h,
CustomKernel.__init__(self, shape=lambda x: 0.5 * np.ones(x.shape), h=h,
domain=[-1.0, 1.0], norm = 1.0)
self._L2Norm = 0.5
self._kernel_var = 1. / 3
@@ -429,7 +433,7 @@ def smoothvar(self, xs, ys, x):
else:
return np.nan

def smoothconf(self, xs, ys, x):
def smoothconf_(self, xs, ys, x):
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
"""
xs, ys = self.in_domain(xs, ys, x)

0 comments on commit 62a7c7e

Please sign in to comment.
You can’t perform that action at this time.