In [None]:
# Kalman filter example demo in Python

# A Python implementation of the example given in pages 11-15 of "An
# Introduction to the Kalman Filter" by Greg Welch and Gary Bishop,
# University of North Carolina at Chapel Hill, Department of Computer
# Science, TR 95-041,
# https://www.cs.unc.edu/~welch/media/pdf/kalman_intro.pdf

# by Andrew D. Straw

import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (10, 8)

# intial parameters
n_iter = 50
sz = (n_iter,) # size of array
x = -0.37727 # truth value (typo in example at top of p. 13 calls this z)
z = np.random.normal(x,0.1,size=sz) # observations (normal about x, sigma=0.1)

Q = 1e-5 # process variance

# allocate space for arrays
xhat=np.zeros(sz)      # a posteri estimate of x
P=np.zeros(sz)         # a posteri error estimate
xhatminus=np.zeros(sz) # a priori estimate of x
Pminus=np.zeros(sz)    # a priori error estimate
K=np.zeros(sz)         # gain or blending factor

R = 0.1**2 # estimate of measurement variance, change to see effect

# intial guesses
xhat[0] = 0.0
P[0] = 1.0

for k in range(1,n_iter):
    # time update
    xhatminus[k] = xhat[k-1]
    Pminus[k] = P[k-1]+Q

    # measurement update
    K[k] = Pminus[k]/( Pminus[k]+R )
    xhat[k] = xhatminus[k]+K[k]*(z[k]-xhatminus[k])
    P[k] = (1-K[k])*Pminus[k]

plt.figure()
plt.plot(z,'k+',label='noisy measurements')
plt.plot(xhat,'b-',label='a posteri estimate')
plt.axhline(x,color='g',label='truth value')
plt.legend()
plt.title('Estimate vs. iteration step', fontweight='bold')
plt.xlabel('Iteration')
plt.ylabel('Voltage')

plt.figure()
valid_iter = range(1,n_iter) # Pminus not valid at step 0
plt.plot(valid_iter,Pminus[valid_iter],label='a priori error estimate')
plt.title('Estimated $\it{\mathbf{a \ priori}}$ error vs. iteration step', fontweight='bold')
plt.xlabel('Iteration')
plt.ylabel('$(Voltage)^2$')
plt.setp(plt.gca(),'ylim',[0,.01])
plt.show()



In [3]:
! pip install requests

Collecting requests
[?25l  Downloading https://files.pythonhosted.org/packages/7d/e3/20f3d364d6c8e5d2353c72a67778eb189176f08e873c9900e10c0287b84b/requests-2.21.0-py2.py3-none-any.whl (57kB)
[K    100% |████████████████████████████████| 61kB 5.1MB/s 
[?25hCollecting chardet<3.1.0,>=3.0.2 (from requests)
[?25l  Downloading https://files.pythonhosted.org/packages/bc/a9/01ffebfb562e4274b6487b4bb1ddec7ca55ec7510b22e4c51f14098443b8/chardet-3.0.4-py2.py3-none-any.whl (133kB)
[K    100% |████████████████████████████████| 143kB 18.4MB/s 
[?25hCollecting urllib3<1.25,>=1.21.1 (from requests)
[?25l  Downloading https://files.pythonhosted.org/packages/df/1c/59cca3abf96f991f2ec3131a4ffe72ae3d9ea1f5894abe8a9c5e3c77cfee/urllib3-1.24.2-py2.py3-none-any.whl (131kB)
[K    100% |████████████████████████████████| 133kB 26.5MB/s 
[?25hCollecting idna<2.9,>=2.5 (from requests)
[?25l  Downloading https://files.pythonhosted.org/packages/14/2c/cd551d81dbe15200be1cf41cd03869a46fe7226e7450af7a6545bfc47

In [14]:
import cudf, nvcategory, requests
from librmm_cffi import librmm
import numpy as np
from io import StringIO
from cuml import NearestNeighbors as cumlKNN
def cat_to_series(col):
    if col.dtype != 'object': col = col.astype('str')
    
    device_array = librmm.device_array(col.data.size(), dtype=np.int32)
    nvcategory.from_strings(col.data).values(devptr=device_array.device_ctypes_pointer.value)
    return cudf.Series(device_array)

url="https://github.com/plotly/datasets/raw/master/tips.csv"
content = requests.get(url).content.decode('utf-8')
tips_cudf = cudf.read_csv(StringIO(content))        
string_cols = ['sex','smoker','day','time']
for cat_col in string_cols:
    tips_cudf[cat_col] = cat_to_series(tips_cudf[cat_col])
knn_cuml = cumlKNN()
knn_cuml.fit(tips_cudf)
n_neighbors = 10
D_cuml,I_cuml = knn_cuml.kneighbors(tips_cudf,n_neighbors)

In [11]:
tips_cudf.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

'0.6.1+1.g9ca9325'

In [None]:
import cudf, nvcategory, requests
from librmm_cffi import librmm
import numpy as np
from io import StringIO
from cuml import NearestNeighbors as cumlKNN
def cat_to_series(col):
    if col.dtype != 'object': col = col.astype('str')
    device_array = librmm.device_array(col.data.size(), dtype=np.int32)
    nvcategory.from_strings(col.data).values(devptr=device_array.device_ctypes_pointer.value)
    return cudf.Series(device_array)

url="https://github.com/plotly/datasets/raw/master/tips.csv"
content = requests.get(url).content.decode('utf-8')
tips_cudf = cudf.read_csv(StringIO(content))        
string_cols = ['sex','smoker','day','time']
for cat_col in string_cols:
    tips_cudf[cat_col] = cat_to_series(tips_cudf[cat_col])
knn_cuml = cumlKNN()
knn_cuml.fit(tips_cudf)
n_neighbors = 10
# D_cuml contains the distance of the samples from one other and I_cuml has the labels for each sample
D_cuml,I_cuml = knn_cuml.kneighbors(tips_cudf,n_neighbors)