### TODO
Add Regularization
Selection of Centers via Orthogonal Lease Squares
Plot the erros based on different parameter settings
Gradient Descent optimization
Different values of SIGMA
Different kernel functions
Multiple Dimensions
Pure Tensorflow (no dataframes)

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.cluster import KMeans
#from sklearn.metrics.pairwise import euclidean_distances
from sklearn.model_selection import train_test_split
import sys

import matplotlib.pyplot as plt

SIGMA          = 2.
NUM_NODES      = 8
TEST_PCT       = 0.2
COLS           = ['A']
TARGET_COL     = 'Target'
CLUSTER_METHOD = 'random'        # "km" "random"

In [None]:
# Load the data
if sys.platform[:3] =='win': data_loc = 'D:/Data/Loyalty Vision/'
else: data_loc = "/home/tom/data/"
filenm = "rbf_data.csv"
df = pd.read_csv(data_loc+filenm, delimiter=',')

In [None]:
# Split into Train and Test
df_train, df_test = train_test_split(df, test_size=TEST_PCT)

train_x = df_train[COLS]
train_y = pd.DataFrame(df_train[TARGET_COL])
test_x = df_test[COLS]
test_y = df_test[TARGET_COL]
print('Rec Counts  Training: {:,} Test  {:,}'.format(df_train.shape[0], df_test.shape[0]))

In [None]:
# Each node has a Center, randomly assigned. Distances are computed to each Center
if CLUSTER_METHOD == 'km':
    print('Clustering via k-means')
    centers = KMeans(n_clusters=NUM_NODES).fit(train_x).cluster_centers_
else:
    print('Clustering via random sample')
    centers = np.array(train_x.sample(NUM_NODES)['A']).reshape(NUM_NODES,1)

In [None]:
# Set up the design matrix
c   = tf.placeholder("float", shape=[train_x.shape[1]])
x   = tf.placeholder("float", shape=[None,train_x.shape[1]])
y_  = tf.placeholder("float", shape=[None,train_y.shape[1]])
DM  = tf.placeholder("float", shape=[None,NUM_NODES])
#
# "rbf" is the radial basis function. Each "x" is processed by each node, so for 50
# samples and a 3-node network, you'd produce a 50x3 array
# Each node will have a different "c" Center but otherwise the same
rbf = tf.exp(-tf.div(tf.pow(tf.sub(x, c),2), tf.pow(SIGMA,2)))

# Loop through the nodes. Use a different "c" each time and accumulate results in rbf_array
# The array has an initial "zeros" just to get the shape right and allow "insert"
rbf_array = np.zeros(shape=[len(train_x),1])

for i in range(NUM_NODES):
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        tom = sess.run(rbf, feed_dict={x: train_x, c: centers[i]})
    rbf_array = np.insert(rbf_array, i+1, tom.flatten(), axis=1)

# Now you can delete that placeholder column of zeros
rbf_array = np.delete(rbf_array,0, axis=1)

#### Optimize the weights
With a series of matrix manipulations

In [None]:
step1 = tf.matmul(DM, DM, transpose_a=True)
step2 = tf.matrix_inverse(step1)
step3 = tf.matmul(step2,DM, transpose_b=True)
weights = tf.matmul(step3, y_)

#### Run the optimization job

In [None]:
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    opt_weights = sess.run(weights, feed_dict={DM: rbf_array, y_: train_y})

#### Generate fitted values

In [None]:
# Now that the weights are optimized, run the test batch through and see what you get
# First, generate a new Design Matrix, which is the test_x values run through the rbf
rbf_array = np.zeros(shape=[len(test_x),1])

for i in range(NUM_NODES):
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        tom = sess.run(rbf, feed_dict={x: test_x, c: centers[i]})
    rbf_array = np.insert(rbf_array, i+1, tom.flatten(), axis=1)
rbf_array = np.delete(rbf_array,0, axis=1)

# Now multiply the DM by the weights for the fitted values
final = tf.matmul(DM, weights)
with tf.Session() as sess:
    fitted_y = sess.run(final, feed_dict={DM: rbf_array, weights: opt_weights})

In [None]:
rmse = np.sqrt(np.mean((fitted_y-test_y.values.reshape(100,1) )**2))
print('RMSE: {:.2f}'.format(rmse))

In [None]:
# Plot the results
with plt.style.context('seaborn-whitegrid'):
    plt.scatter(test_x,test_y, label='Actual')
    plt.scatter(test_x, fitted_y, label='Fitted',c='red')
    plt.xlabel("X value")
    plt.ylabel("Y value")
    plt.legend(loc='upper left')
    plt.show()