In [16]:
from scipy.spatial import ConvexHull, Delaunay
from scipy.stats import dirichlet

import pandas as pd
import numpy as np
from numpy.linalg import det




from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

In [20]:
diabetes = load_diabetes()

In [21]:
X = diabetes["data"]
y = diabetes["target"]

In [196]:
X_train = X[:380]
y_train = y[:380]
X_test  = X[380:]
y_test  = y[380:]

In [3]:
def get_ConvexHull(X):
    """Returns points that form convex hull of X"""
    
    hull = ConvexHull(X)
    
    return X[hull.vertices, :]
    

In [15]:
def sampleConvexHull(X):
    """Returns sample from convex hull by taking convex combination points X
    Input:
        X ~ vertices of convex hull (hypothetically)
        
    Ouput:
        x_hat : convex combination of uniformly sampled weights
    
    """
    
    d = X.shape[0]
    wghts = np.random.rand(d)
    wghts /= wghts.sum()
    
    return wghts.dot(X)

In [154]:
def kernelRFdist(X, xstar, rf):
    """For new data point get kernel distance with each training data point from a random forrest"""
    
    n = X.shape[0]
    kernel_rf = np.zeros(n)
    for tree in rf.estimators_:
        
        leaves_tr = tree.apply(X)
        leaf_str = tree.apply(xstar.reshape(1, -1))
        kernel = 1*(leaf_str == leaves_tr[None,:])
        kernel_rf = kernel_rf + kernel
        
    return kernel_rf/rf.n_estimators

In [166]:
def boostrap_subset(X, M, size=1000):
    """Returns boostrap of X with subset of columns of size M"""
    n, p = X.shape
    b_ids = np.random.choice(np.arange(n), size=size)
    
    p_ids = np.random.choice(np.arange(p), size=M, replace=False)
    
    return X[b_ids, p_ids]

In [200]:
def local_linear(X,y, xstar, kern):
    """Fits local linear model with kernl"""
    
    n,p = X.shape
    beta = np.zeros(p)
    gamma_n = np.zeros((p,p))
    sigma_n = np.zeros(p)
    for i in range(n):
        gamma_n = gamma_n + (X[i,:][:,None] @ X[i,:][None,:] )* kern[i]
        sigma_n = sigma_n +  X[i, :]*y[i] * kern[i]
    
    beta = np.linalg.inv(gamma_n) @ sigma_n
    
    return xstar.dot(beta)

In [201]:
rf = RandomForestRegressor()

In [202]:
rf.fit(X_train, y_train)

RandomForestRegressor()

In [203]:
kr = kernelRFdist(X_train, X_test[0,:], rf)

In [204]:
local_linear(X_train, y_train, X_test[0,:], kern=kr.ravel())

144.4253008299617

In [205]:
y_test[0]

52.0

In [121]:
X_test[0,:].reshape(1, -1).shape

(1, 10)

In [122]:
X_train.shape

(380, 10)

In [133]:
rf.estimators_[0].apply(X_train).shape

(380,)

In [159]:
np.random.choice(X_test, 200)

ValueError: a must be 1-dimensional

In [None]:
X_

In [161]:
bids = np.random.choice(np.arange(380), size=1000)

In [163]:
X_train[bids, :].shape

(1000, 10)