## Using a hybrid of Random Forest and Neural Network 
Random Forest as Black Box and Neural Network as the White Box and vice versa

In [1]:
%matplotlib inline
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import sys
sys.path.insert(0, '../utils/')
from generate_random import generate_random
sys.path.insert(0, '../Neural Nets/')
from black_box import blackBox

In [3]:
from sklearn.ensemble import RandomForestRegressor

In [4]:
df = pd.read_csv('../data/heart.csv')
labels = pd.get_dummies(df.pop('chd'))
df['famhist'] = df['famhist'].map({"Absent":0,"Present":1})
data = df.values
print data.shape
print labels.shape

(462, 9)
(462, 2)


In [5]:
## Training the black box
rfc = RandomForestRegressor()
rfc.fit(data, labels)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [6]:
## Generating random data
data_size = 50000
random_data = generate_random(data_size).values
print random_data.shape

(50000, 9)


In [7]:
## Prediction of black box on the random data
predictions = rfc.predict(random_data)
print predictions.shape

(50000, 2)


In [8]:
## Constructing the WHITE BOX
## Defining the parameters
params = {}
params['batch_size'] = 100
params['learning_rate'] = 1e-4
params['n_epochs'] = 10
NUM_FEATURES = 9
NUM_CLASSES = 2
NUM_NEURONES = 1024

## Assembling the graph
print 'Assembling the graph....'
model = blackBox(params,"black_box")
image,label = model._create_placeholder(num_input=NUM_FEATURES,num_output=NUM_CLASSES,layer_name="placeholder_bb")
hidden_1 = model._create_hidden_layer(image,NUM_NEURONES,"hidden_1_bb")
hidden_2 = model._create_hidden_layer(hidden_1,NUM_NEURONES,"hidden_2_bb")
softmax = model._create_softmax(hidden_2,NUM_CLASSES,"softmax_bb")
model._create_loss(label,softmax,"loss_bb")
model._create_optimizer("optimizer_bb")
model._accuracy(label,softmax,"accuracy_bb")
model._create_summaries("summary_bb")

Assembling the graph....


In [9]:
print 'Training the model...'
model.train((random_data,predictions),file_name="hybrid")

Training the model...
Restoring checkpoint...
INFO:tensorflow:Restoring parameters from ../checkpoints/hybrid/hybrid


100%|██████████| 500/500 [00:13<00:00, 36.43it/s]
  1%|          | 4/500 [00:00<00:14, 35.12it/s]

Epoch: 1	Loss: 321.115026534	Accuracy: 0.796599998355


100%|██████████| 500/500 [00:13<00:00, 36.84it/s]
  1%|          | 4/500 [00:00<00:14, 34.71it/s]

Epoch: 2	Loss: 317.148293734	Accuracy: 0.817019998252


100%|██████████| 500/500 [00:13<00:00, 36.76it/s]
  1%|          | 4/500 [00:00<00:14, 35.17it/s]

Epoch: 3	Loss: 316.177618325	Accuracy: 0.822599999785


100%|██████████| 500/500 [00:14<00:00, 35.05it/s]
  1%|          | 4/500 [00:00<00:13, 37.22it/s]

Epoch: 4	Loss: 315.959846199	Accuracy: 0.826179998994


100%|██████████| 500/500 [00:13<00:00, 37.73it/s]


Saving Checkpoint...


  1%|          | 4/500 [00:00<00:13, 37.31it/s]

Epoch: 5	Loss: 315.306266129	Accuracy: 0.831119999051


100%|██████████| 500/500 [00:13<00:00, 38.38it/s]
  1%|          | 4/500 [00:00<00:12, 38.40it/s]

Epoch: 6	Loss: 314.872749746	Accuracy: 0.832459999144


100%|██████████| 500/500 [00:13<00:00, 37.41it/s]
  1%|          | 4/500 [00:00<00:12, 38.47it/s]

Epoch: 7	Loss: 314.561488569	Accuracy: 0.834459999084


100%|██████████| 500/500 [00:13<00:00, 37.85it/s]
  1%|          | 4/500 [00:00<00:12, 38.92it/s]

Epoch: 8	Loss: 313.839312971	Accuracy: 0.837019998789


100%|██████████| 500/500 [00:14<00:00, 35.31it/s]
  1%|          | 4/500 [00:00<00:13, 36.34it/s]

Epoch: 9	Loss: 313.607651711	Accuracy: 0.838639999986


100%|██████████| 500/500 [00:14<00:00, 37.17it/s]


Saving Checkpoint...
Epoch: 10	Loss: 313.111818552	Accuracy: 0.842820000291


In [10]:
output = model.predict(data,file_name="hybrid")

Restoring checkpoint...
INFO:tensorflow:Restoring parameters from ../checkpoints/hybrid/hybrid


In [11]:
print output.shape

(462, 2)


In [12]:
print np.mean(np.equal(np.argmax(output, 1),np.argmax(labels.values, 1)))

0.619047619048


In [13]:
print rfc.score(data, labels.values)

0.830478890728


** Results didn't improve much on using a hybrid approach towards the cloning problem **