In [None]:
%matplotlib inline
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.distributions.mixture_rvs import MixtureDistribution, mv_mixture_rvs
from statsmodels.kernel_methods import bandwidths, fast_linbin, kde_1d, kde_utils

## Multivariate, with unbounded dimensions

In [None]:
import statsmodels.sandbox.distributions.mv_normal as mvd
cov1 = np.diag([1,.2])
cov2 = np.diag([.5,1.1])
mu1 = np.array([2, -2.5])
mu2 = np.array([-2, 1])
mvn1 = mvd.MVNormal(mu1, cov1)
mvn2 = mvd.MVNormal(mu2, cov2)

In [None]:
rvs = mv_mixture_rvs([.6, .4], 1000, [mvn1, mvn2], 2)

def dist_pdf(x):
    return 0.6*mvn1.pdf(x) + 0.4*mvn2.pdf(x)

rvs.shape

In [None]:
bounds = [[-5,6],[-5,6]]

In [None]:
grid, values = fast_linbin.fast_bin_nd(rvs, bounds, 128)
values /= grid.start_volume * rvs.shape[0]
grid

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = grid.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], values, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Binning of the data')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(grid.linear()).reshape(grid.shape)
m = ax.pcolormesh(m2[0], m2[1], real_pdf, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Real distribution')
plt.colorbar(ax=ax, mappable=m)

In [None]:
kde = sm.kernel_methods.KDE(rvs)
mod = kde.fit()
xs, ys = mod.grid()
mod.bandwidth

In [None]:
print(xs.shape, xs.bounds)
xs.integrate(ys)

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
mod.pdf([[-2, 1]])

In [None]:
xs3 = kde_utils.Grid([np.r_[-6:6:64j], np.r_[-6:6:64j]])
xxs3 = xs3.full().reshape((np.prod(xs3.shape),2))

In [None]:
ys3 = mod.pdf(xxs3)

In [None]:
ys3.shape = xs3.shape
xs3.integrate(ys3)

In [None]:
fig = plt.figure(figsize=(24, 8))
m3 = xs3.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m3[0], m3[1], ys3, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Estimated KDE')
plt.colorbar(ax=ax, mappable=m)


## Multivariate, with one reflective, one cyclic dimension

In [None]:
import statsmodels.sandbox.distributions.mv_normal as mvd
cov1 = np.diag([.3,.2])
cov2 = np.diag([.5,0.6])
mu1 = np.array([0.3, 1.5])
mu2 = np.array([3, 2.5])
mvn1 = mvd.MVNormal(mu1, cov1)
mvn2 = mvd.MVNormal(mu2, cov2)

In [None]:
rvs = mv_mixture_rvs([.4, .6], 10000, [mvn1, mvn2], 2)
rvs[:,0] = abs(rvs[:,0])
rvs[:,1] = rvs[:,1] % 3

def dist_pdf_(x):
    return 0.4*mvn1.pdf(x) + 0.6*mvn2.pdf(x)

def dist_pdf(x):
    ref_x = np.c_[-x[:,0], x[:,1]]
    cyc_x = np.c_[np.zeros(len(x)), 3*np.ones(len(x))]
    return dist_pdf_(x) + dist_pdf_(ref_x) + dist_pdf_(x + cyc_x) + dist_pdf_(x - cyc_x)

rvs.shape

In [None]:
bounds = [[0,5],[0,3]]

In [None]:
grid, values = fast_linbin.fast_bin_nd(rvs, bounds, 128)
values /= grid.start_volume * rvs.shape[0]
grid

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = grid.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], values, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Binning of the data')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(grid.linear()).reshape(grid.shape)
m = ax.pcolormesh(m2[0], m2[1], real_pdf, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Real distribution')
plt.colorbar(ax=ax, mappable=m)

In [None]:
k = sm.kernel_methods.KDE(rvs)

In [None]:
k.lower = [0, 0]
k.upper = [np.inf, 3]
k.method.methods[0] = kde_1d.Reflection1D()
k.method.methods[1] = kde_1d.Cyclic1D()

In [None]:
est = k.fit()
xs, ys = est.grid()
est.bandwidth

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Grid estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
xs3 = kde_utils.Grid([np.r_[0:5:64j], np.r_[0:3:64j]])
xxs3 = xs3.full().reshape((np.prod(xs3.shape),2))

In [None]:
ys3 = est.pdf(xxs3)

In [None]:
ys3.shape = xs3.shape
xs3.integrate(ys3)

In [None]:
fig = plt.figure(figsize=(24, 8))
m3 = xs3.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m3[0], m3[1], ys3, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Point-wise estimated KDE')
plt.colorbar(ax=ax, mappable=m)

## Multivariate, with one bounded, one log-transform variable

In [None]:
import statsmodels.sandbox.distributions.mv_normal as mvd

cov1 = np.diag([.3,.2])
cov2 = np.diag([.5,1])
mu1 = np.array([0.3, -1])
mu2 = np.array([3, -2.5])
mvn1 = mvd.MVNormal(mu1, cov1)
mvn2 = mvd.MVNormal(mu2, cov2)

In [None]:
rvs = mv_mixture_rvs([.7, .3], 15000, [mvn1, mvn2], 2)
rvs = rvs[rvs[:,0]>=0,:]
rvs[:,1] = np.exp(rvs[:,1])

def pdf_base_(x):
    return 0.7*mvn1.pdf(x) + 0.3*mvn2.pdf(x)

cropped = 0.7*mvn1.cdf(0) + 0.3*mvn2.cdf(0)

def dist_pdf(x):
    real_x = np.c_[x[:,0], np.log(x[:,1])]
    ys = pdf_base_(real_x)
    out = np.empty_like(ys)
    kde_1d.transform_distribution(real_x[:,1], ys, kde_1d.LogTransform.Dinv, out)
    return out / (1-cropped)

rvs.shape

In [None]:
bounds = [[0, 5], [0, 1.2]]

In [None]:
grid, values = fast_linbin.fast_bin_nd(rvs, bounds, 128)
values /= grid.start_volume * rvs.shape[0]
grid

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = grid.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], values, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Binning of the data')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(grid.linear()).reshape(grid.shape)
m = ax.pcolormesh(m2[0], m2[1], real_pdf, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Real distribution')
plt.colorbar(ax=ax, mappable=m)

In [None]:
grid.shape, real_pdf.shape

In [None]:
grid.integrate(real_pdf)

In [None]:
k = sm.kernel_methods.KDE(rvs)

In [None]:
k.method.methods[0] = kde_1d.Renormalization()
k.method.methods[1] = kde_1d.Transform1D(kde_1d.LogTransform)

In [None]:
k.lower = [0, 0]

In [None]:
est = k.fit()
xs, ys = est.grid(N=512)
est.bandwidth

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Grid estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
k.bandwidth = bandwidths.CrossValidation(bandwidths.CVIMSE, use_grid=True, folding=10)
est2 = k.fit()

In [None]:
xs, ys = est2.grid(N=512)
est2.bandwidth

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Grid estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
xs3 = kde_utils.Grid([np.r_[0:5:64j], np.r_[0:1.2:64j]])
xxs3 = xs3.full().reshape((np.prod(xs3.shape),2))

In [None]:
ys3 = est2.pdf(xxs3)

In [None]:
ys3.shape = xs3.shape

In [None]:
fig = plt.figure(figsize=(24, 8))
m3 = xs3.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m3[0], m3[1], ys3, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Point-wise estimated KDE')
plt.colorbar(ax=ax, mappable=m)

# Multivariate, with one ordered, one unbounded dimension

In [None]:
from scipy.stats import poisson, norm

x_dist = poisson(2)
y_dist = norm()

def dist_pdf(x):
    return x_dist.pmf(x[:,0]) * y_dist.pdf(x[:,1])

rvs = np.c_[x_dist.rvs(20000), y_dist.rvs(20000)]

In [None]:
bounds = [[0, 10], [-5, 5]]

In [None]:
grid, values = fast_linbin.fast_bin_nd(rvs, bounds, 128, bin_type='DB')
values /= grid.start_volume * rvs.shape[0]
grid

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = grid.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], values, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Binning of the data')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(grid.linear()).reshape(grid.shape)
m = ax.pcolormesh(m2[0], m2[1], real_pdf, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Real distribution')
plt.colorbar(ax=ax, mappable=m)

In [None]:
kde = sm.kernel_methods.KDE(rvs, axis_type='OC')

#kde.method.methods[1] = kde_1d.KDE1DMethod()
mod = kde.fit()
mod.bandwidth = (0.1, mod.bandwidth[1])
xs, ys = mod.grid()
mod.bandwidth

In [None]:
xs.integrate(ys)

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
kde.bandwidth = bandwidths.CrossValidation(bandwidths.CVIMSE, use_grid=True, folding=10)
mod2 = kde.fit()
mod2.bandwidth

In [None]:
xs, ys = mod2.grid()

In [None]:
fig = plt.figure(figsize=(24, 8))
m2 = xs.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m2[0], m2[1], ys, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Estimated KDE')
plt.colorbar(ax=ax, mappable=m)

ax = fig.add_subplot(1,2,2)
real_pdf = dist_pdf(xs.linear()).reshape(xs.shape)
m = ax.pcolormesh(m2[0], m2[1], (ys-real_pdf), cmap=plt.cm.coolwarm, shading='gouraud')
cmin, cmax = m.get_clim()
crange = max(abs(cmin), abs(cmax))
m.set_clim(-crange, crange)
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Error on the KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
xs3 = kde_utils.Grid([np.r_[0:10:1], np.r_[-5:5:512j]])
xxs3 = xs3.full().reshape((np.prod(xs3.shape),2))

In [None]:
ys3 = mod2.pdf(xxs3)

In [None]:
ys3.shape = xs3.shape
xs3.integrate(ys3)

In [None]:
fig = plt.figure(figsize=(24, 8))
m3 = xs3.full('C')
ax = fig.add_subplot(1,2,1)
m = ax.pcolormesh(m3[0], m3[1], ys3, cmap=plt.cm.YlGnBu_r, shading='gouraud')
ax.set_xlim(*bounds[0])
ax.set_ylim(*bounds[1])
ax.set_title('Point-wise estimated KDE')
plt.colorbar(ax=ax, mappable=m)

In [None]:
kde.method.methods[1] = kde_1d.KDE1DMethod()
kde.bandwidth = mod2.bandwidth
mod3 = kde.fit()

In [None]:
ys4 = mod3.pdf(xxs3)
ys4.shape = xs3.shape

In [None]:
xs3.integrate(ys4)