Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sandbox kernels bugs uniform kernel and confint #1233

Merged
merged 5 commits into from Dec 18, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,41 @@
ship,service,accident,lnservice,x_epa,s_epa,se_epa,s_epan2,se_epan2,s_bi,se_bi,s_cos,se_cos,s_gau,se_gau,s_par,se_par,s_rec,se_rec,s_tri,se_tri
1,127,0,4.8441871,3.8066625,.12329346,.14769943,0,0,0,,0,,.31789228,.4052318,0,0,0,0,0,0
1,63,0,4.1431347,3.9837171,.30532974,.3303055,0,0,0,0,0,,.41252919,.42109712,0,0,0,0,0,0
1,1095,3,6.9985096,4.1607717,.4772784,.40478135,0,0,0,0,0,0,.52838333,.43606074,0,0,0,0,0,0
1,1095,4,6.9985096,4.3378263,.61282275,.44758809,0,0,0,0,0,0,.66734481,.45049239,0,0,0,0,0,0
1,1512,6,7.3211886,4.5148809,.80280046,.47485868,0,0,0,0,0,0,.83052757,.46479833,0,0,0,0,0,0
1,3353,18,8.1176107,4.6919355,1.0210594,.49654713,.03706897,.09236707,.0074708,.05803329,0,0,1.0180961,.47929628,.00043803,.01972916,.125,.13028878,.02555028,.08658423
1,,,,4.8689901,1.2441905,.49923789,.10698662,.14421609,.05711468,.12184728,0,0,1.229218,.49413223,.01397375,.08767939,.375,.20365216,.08187182,.13847185
1,2244,11,7.7160153,5.0460447,1.5644613,.51395645,.2555246,.1899172,.16885066,.17972138,0,0,1.4621654,.50929012,.07298214,.16104218,.375,.20365216,.2133559,.18540563
2,44882,39,10.711792,5.2230993,1.8424735,.5198217,.65594898,.32723303,.40192126,.2890999,.0710275,.31763581,1.7145553,.52469647,.21816525,.24403735,1.25,.35538055,.53410384,.32639086
2,17176,29,9.7512683,5.4001539,2.0737941,.52816297,.99872335,.59744092,.79424007,.46997954,.23735253,.34395122,1.9836917,.54036516,.48683035,.34223342,1.2,.67885023,.86017097,.55800251
2,28609,58,10.261477,5.5772085,2.3894807,.53787442,1.2640475,.69937166,1.2004473,.71128886,.5048966,.30632371,2.2669573,.5565033,.90661691,.5758976,1.1818182,.6239406,1.1443251,.68690512
2,20370,53,9.9218185,5.7542631,2.699925,.55139629,1.6381974,.73260634,1.5530396,.78276673,1.3001163,.31095639,2.5622095,.57352395,1.5053893,.82142495,2,.71726207,1.5784271,.74771943
2,7064,12,8.8627667,5.9313178,3.002866,.57346272,2.0201789,.77831474,1.9716411,.81327887,2.8162974,1.226396,2.8681543,.59196672,2.1398785,.9260037,2.1666667,.78337127,2.066329,.80936863
2,13099,44,9.4802912,6.1083724,3.3466309,.60300519,2.3937704,.74851451,2.3934974,.86449113,3.1926703,1.3202449,3.1846931,.61238059,2.5756388,1.0828446,2.9411765,.71383498,2.4929507,.82300954
2,,,,6.285427,3.6446535,.62661919,2.9643522,.78699236,2.889543,.87761263,2.4193017,1.0956887,3.5132647,.63524734,2.7781945,1.0924244,3.125,.76355747,2.8671936,.8379938
2,7117,18,8.8702416,6.4624816,3.9139935,.64897564,3.4327948,.81041468,3.3693418,.89571115,2.5602516,1.0064842,3.8572215,.66100741,3.0590496,1.0317798,3.2941176,.71385854,3.3151737,.84782678
3,1179,1,7.0724219,6.6395362,4.1691041,.66908117,3.8040168,.86832264,3.7325837,.91037091,3.3404117,1.2808264,4.2222958,.69019316,3.5318061,1.0112434,3.5882353,.7438199,3.7463715,.90365001
3,552,1,6.313548,6.8165908,4.5750253,.68819345,4.2162402,.8637004,4.0628918,.91508898,4.1919688,1.239588,4.6172215,.72360499,4.0858157,1.0109114,4.6666667,.81728135,4.1712169,.88953834
3,781,0,6.6605751,6.9936454,4.9590387,.71212367,4.6200341,.88433507,4.5578482,.93666243,4.734061,1.3656841,5.0545687,.76243461,4.5991536,1.0830329,4.8235294,.86176159,4.5935322,.92215582
3,676,1,6.5161931,7.1707,5.3171402,.73568314,5.1443835,.94285029,5.146322,1.0136781,5.0695209,1.4684254,5.5518247,.80827875,5.1049637,1.1584437,5.4705882,.86431651,5.1522148,.98876585
3,783,6,6.6631327,7.3477546,5.8518296,.80936679,5.9570816,1.0121809,5.869871,1.0893313,5.5644763,1.5825716,6.1326807,.86305962,5.7554599,1.2308547,6.0666667,.960576,5.9031017,1.0528421
3,1948,2,7.5745585,7.5248092,6.5467223,.89781811,6.6851413,1.0735022,6.7320744,1.1767386,6.6235468,1.5821764,6.8283326,.92891809,6.6689378,1.3860984,6.4285714,.98966745,6.6430143,1.1449776
3,,,,7.7018638,7.4652016,.97790307,7.4187558,1.2041802,7.7077993,1.3088584,7.6290218,1.8955101,7.6783363,1.0081701,7.9153409,1.5771601,7,1.1438753,7.6103718,1.2863877
3,274,1,5.6131281,7.8789184,8.7744321,1.0876045,8.2385678,1.4807375,8.9003175,1.5371139,10.267983,1.0761361,8.7301677,1.1034479,9.574323,1.7242064,8.2307692,1.1864501,8.608801,1.5508764
4,251,0,5.5254529,8.055973,10.10718,1.1820417,10.156171,1.7431284,10.423112,1.7077034,15.698618,2.0223725,10.036203,1.2181548,11.796516,1.6800922,9.5,1.6110674,10.634889,1.8029673
4,105,0,4.6539604,8.2330276,11.878725,1.2731189,11.401627,1.509397,12.24627,1.4673791,,,11.646691,1.357218,14.396004,1.8972035,10.375,1.5971147,11.974029,1.5502406
4,288,0,5.6629605,8.4100822,13.813034,1.3843952,12.862858,1.451617,14.266549,1.718067,17.675596,,13.598009,1.5277058,15.847903,2.2735634,11,1.2911926,13.384157,1.5365556
4,192,0,5.2574954,8.5871368,16.138063,1.6293195,16.768068,3.4120536,16.063861,3.3815827,14.949336,,15.89769,1.7383417,15.550994,2.7209926,17,2.6971107,16.475888,3.500466
4,349,2,5.8550719,8.7641914,18.704726,1.9339696,20.279878,3.3885785,18.116286,2.4045809,14.976542,7.756e-06,18.511042,1.9969536,15.776174,2.7247086,24.2,5.6427742,18.864818,3.1309913
4,1208,11,7.0967214,8.941246,21.454671,2.2765158,23.176469,5.1846819,21.147695,4.9939902,15.016853,,21.356577,2.3061881,17.744601,3.5298145,29,4.4732851,21.420318,5.0398337
4,,,,9.1183007,24.227244,2.6267109,27.735332,5.1798876,25.593453,4.6681497,19.453089,,24.315601,2.659853,23.054296,3.2635001,31.2,5.6100834,26.538277,4.9479733
4,2051,4,7.6260828,9.2953553,27.501067,3.13848,31.009946,5.0669349,30.335859,5.5939608,40.18724,3.1107716,27.254535,3.0428081,31.343609,5.9851096,35.666667,4.2258687,31.132646,5.2766521
5,45,0,3.8066625,9.4724099,29.865417,3.480465,35.280242,4.7762274,35.77169,5.631128,39.87102,,30.051145,3.4350708,37.903318,8.2136836,35.666667,4.2258687,36.110331,5.3526138
5,,,,9.6494645,32.366704,3.8884098,39.295489,5.0504157,41.0729,6.0118254,39.349167,1.107e-06,32.61407,3.8177,41.107756,7.7374106,35.666667,4.2258687,39.712318,5.6418531
5,789,7,6.6707663,9.8265191,35.170375,4.4347685,43.943554,5.6605077,44.758047,6.8787882,41.454683,9.4239941,34.890045,4.1771912,43.653343,7.9574455,36.142857,3.9463267,43.787637,6.1605763
5,437,7,6.0799332,10.003574,36.866635,4.6921764,45.338485,6.4698356,45.904662,6.681533,48.035083,,36.860218,4.5068711,46.854317,7.1532436,44.6,6.4022513,45.882313,6.5299339
5,1157,5,7.0535857,10.180628,37.901392,4.6895847,45.632833,6.2523373,46.75837,6.1857605,55.415952,,38.530771,4.8058845,49.774608,5.8191326,44.6,6.4022513,46.878849,6.2036373
5,2161,12,7.6783264,10.357683,39.130413,4.2530464,46.064455,6.1045296,47.441308,5.432277,54.583244,,39.922825,5.0771783,50.221868,4.3469597,44.6,6.4022513,47.268841,6.0161708
5,,,,10.534737,40.012261,4.3810509,46.7329,3.4000914,47.671381,3.7043449,46.067596,,41.064426,5.3255602,47.596159,4.2464822,44.75,3.2324493,46.923605,3.5550717
5,542,1,6.295266,10.711792,41.109675,4.5732279,47.725125,3.8355989,46.849267,4.3269949,39.448327,,41.98531,5.5562919,43.85307,5.0459727,44.75,3.2324493,46.219712,4.171591
154 changes: 154 additions & 0 deletions statsmodels/nonparametric/tests/test_kernels.py
@@ -0,0 +1,154 @@
# -*- coding: utf-8 -*-
"""

Created on Sat Dec 14 17:23:25 2013

Author: Josef Perktold
"""

import os
import numpy as np
from statsmodels.sandbox.nonparametric import kernels

from numpy.testing import assert_allclose, assert_array_less

DEBUG = 0

curdir = os.path.dirname(os.path.abspath(__file__))
fname = 'results/results_kernel_regression.csv'
results = np.recfromcsv(os.path.join(curdir, fname))

y = results['accident']
x = np.log(results['service'])
use_mask = ~np.isnan(x)
x = x[use_mask]
y = y[use_mask]
xg = np.linspace(x.min(), x.max(), 40) # grid points default in Stata

#kern_name = 'gau'
#kern = kernels.Gaussian()
#kern_name = 'epan2'
#kern = kernels.Epanechnikov()
#kern_name = 'rec'
#kern = kernels.Uniform() # ours looks awful
#kern_name = 'tri'
#kern = kernels.Triangular()
#kern_name = 'cos'
#kern = kernels.Cosine() #doesn't match up, nan in Stata results ?
#kern_name = 'bi'
#kern = kernels.Biweight()

class CheckKernelMixin(object):

se_rtol = 0.7
upp_rtol = 0.1
low_rtol = 0.2
low_atol = 0.3

def test_smoothconf(self):
kern_name = self.kern_name
kern = self.kern
#fittedg = np.array([kernels.Epanechnikov().smoothconf(x, y, xi) for xi in xg])
fittedg = np.array([kern.smoothconf(x, y, xi) for xi in xg])
# attach for inspection from outside of test run
self.fittedg = fittedg

res_fitted = results['s_' + kern_name]
res_se = results['se_' + kern_name]
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
# implied standard deviation from conf_int
se = (fittedg[:, 2] - fittedg[:, 1]) / crit
fitted = fittedg[:, 1]

# check both rtol & atol
assert_allclose(fitted, res_fitted, rtol=5e-7, atol=1e-20)
assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)

# TODO: check we are using a different algorithm for se
# The following are very rough checks

self.se = se
self.res_se = res_se
se_valid = np.isfinite(res_se)
if np.any(~se_valid):
print 'nan in stata result', self.__class__.__name__
assert_allclose(se[se_valid], res_se[se_valid], rtol=self.se_rtol, atol=0.2)
# check that most values are closer
mask = np.abs(se - res_se) > (0.2 + 0.2 * res_se)
if not hasattr(self, 'se_n_diff'):
se_n_diff = 40 * 0.125
else:
se_n_diff = self.se_n_diff
assert_array_less(mask.sum(), se_n_diff + 1) # at most 5 large diffs

if DEBUG:
# raises: RuntimeWarning: invalid value encountered in divide
print fitted / res_fitted - 1
print se / res_se - 1
# Stata only displays ci, doesn't save it
res_upp = res_fitted + crit * res_se
res_low = res_fitted - crit * res_se
self.res_fittedg = np.column_stack((res_low, res_fitted, res_upp))
if DEBUG:
print fittedg[:, 2] / res_upp - 1
print fittedg[:, 2] - res_upp
print fittedg[:, 0] - res_low
print np.max(np.abs(fittedg[:, 2] / res_upp - 1))
assert_allclose(fittedg[se_valid, 2], res_upp[se_valid],
rtol=self.upp_rtol, atol=0.2)
assert_allclose(fittedg[se_valid, 0], res_low[se_valid],
rtol=self.low_rtol, atol=self.low_atol)

#assert_allclose(fitted, res_fitted, rtol=0, atol=1e-6)

def t_est_smoothconf_data(self):
kern = self.kern
crit = 1.9599639845400545 # norm.isf(0.05 / 2)
# no reference results saved to csv yet
fitted_x = np.array([kern.smoothconf(x, y, xi) for xi in x])
if DEBUG:
print (fitted_x[:, 2] - fitted_x[:, 1]) / crit

class TestEpan(CheckKernelMixin):
kern_name = 'epan2'
kern = kernels.Epanechnikov()

class TestGau(CheckKernelMixin):
kern_name = 'gau'
kern = kernels.Gaussian()

class TestUniform(CheckKernelMixin):
kern_name = 'rec'
kern = kernels.Uniform()
se_rtol = 0.8
se_n_diff = 8
upp_rtol = 0.4
low_rtol = 0.2
low_atol = 0.8

class TestTriangular(CheckKernelMixin):
kern_name = 'tri'
kern = kernels.Triangular()
se_n_diff = 10
upp_rtol = 0.15
low_rtol = 0.3

class T_estCosine(CheckKernelMixin):
# Stata results for Cosine look strange, has nans
kern_name = 'cos'
kern = kernels.Cosine2()

class TestBiweight(CheckKernelMixin):
kern_name = 'bi'
kern = kernels.Biweight()
se_n_diff = 9
low_rtol = 0.3

if __name__ == '__main__':
tt = TestEpan()
tt = TestGau()
tt = TestBiweight()
tt.test_smoothconf()
diff_rel = tt.fittedg / tt.res_fittedg - 1
diff_abs = tt.fittedg - tt.res_fittedg
mask = np.abs(tt.fittedg - tt.res_fittedg) > (0.3 + 0.1 * tt.res_fittedg)
12 changes: 8 additions & 4 deletions statsmodels/sandbox/nonparametric/kernels.py
Expand Up @@ -291,23 +291,27 @@ def smoothvar(self, xs, ys, x):
else:
return np.nan

def smoothconf(self, xs, ys, x):
def smoothconf(self, xs, ys, x, alpha=0.05):
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
"""
xs, ys = self.in_domain(xs, ys, x)

if len(xs) > 0:
fittedvals = np.array([self.smooth(xs, ys, xx) for xx in xs])
#fittedvals = self.smooth(xs, ys, x) # x or xs in Haerdle
sqresid = square(
subtract(ys, fittedvals)
)
w = np.sum(self((xs-x)/self.h))
#var = sqresid.sum() / (len(sqresid) - 0) # nonlocal var ? JP just trying
v = np.sum([rr*self((xx-x)/self.h) for xx, rr in zip(xs, sqresid)])
var = v / w
sd = np.sqrt(var)
K = self.L2Norm
yhat = self.smooth(xs, ys, x)
err = sd * K / np.sqrt(w * self.h * self.norm_const)
from scipy import stats
crit = stats.norm.isf(alpha / 2)
err = crit * sd * np.sqrt(K) / np.sqrt(w * self.h * self.norm_const)
return (yhat - err, yhat, yhat + err)
else:
return (np.nan, np.nan, np.nan)
Expand Down Expand Up @@ -365,7 +369,7 @@ def __call__(self, x):

class Uniform(CustomKernel):
def __init__(self, h=1.0):
CustomKernel.__init__(self, shape=lambda x: 0.5, h=h,
CustomKernel.__init__(self, shape=lambda x: 0.5 * np.ones(x.shape), h=h,
domain=[-1.0, 1.0], norm = 1.0)
self._L2Norm = 0.5
self._kernel_var = 1. / 3
Expand Down Expand Up @@ -429,7 +433,7 @@ def smoothvar(self, xs, ys, x):
else:
return np.nan

def smoothconf(self, xs, ys, x):
def smoothconf_(self, xs, ys, x):
"""Returns the kernel smoothing estimate with confidence 1sigma bounds
"""
xs, ys = self.in_domain(xs, ys, x)
Expand Down