## Implementing Neural Networks for Cloning procedure

In [3]:
# Standard Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from utils import generate_pure_random, generate_constrained_random
from experiment import Random
import pickle as pkl
from collections import Counter

# Keras Library for Neural Network
from keras.layers import Dense
from keras.models import Sequential

Using TensorFlow backend.


In [4]:
def create_model(num_features, num_output):
    model = Sequential()
    model.add(Dense(128, input_dim=num_features, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(num_output, activation='softmax'))
    return model

def run_network(model, X, y, feature_names, verbose=False, num_epochs=10):
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X, y, epochs=num_epochs, batch_size=100, verbose=verbose)
    print 'Black Box accuracy: ',np.mean(np.equal(np.argmax(y, 1), np.argmax(model.predict(X), 1)))
    X_random = generate_constrained_random(X, feature_names, data_size=1000).values
    y_random = model.predict(X_random)
    return X_random, y_random

def check_performance(model, X_random, y_random, X, y, verbose=False, num_epochs=10):
    try:
        
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        model.fit(X_random, y_random, epochs=num_epochs, batch_size=10, verbose=verbose)
        preds = model.predict(X)
        return np.mean(np.equal(np.argmax(y, 1), np.argmax(preds, 1)))
    
    except:
        
        model.fit(X_random, y_random)
        score = model.score(X, y)
        return score

In [3]:
## Iris Dataset
data = datasets.load_iris()
X = data.data
y = data.target
feature_names = data.feature_names
X = X[(y == 0) | (y == 1), :]
y = pd.get_dummies(y[(y == 0) | (y == 1)]).values
print X.shape, y.shape

model = create_model(4, 2)
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=100)
print X_random.shape, y_random.shape

whitebox = create_model(4, 2)
score = check_performance(whitebox, X_random, y_random, X, y, verbose=True, num_epochs=10)
print 'Whitebox score on original dataset: ', score

(100, 4) (100, 2)
Black Box accuracy:  1.0
(1000, 4) (1000, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Whitebox score on original dataset:  1.0


In [6]:
## Make Moons Dataset
data = datasets.make_moons()
X, y = data
y = pd.get_dummies(y).values
feature_names = ['feature1', 'feature2']
print X.shape, y.shape

model = create_model(2, 2)
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=100)
print X_random.shape, y_random.shape

whitebox = create_model(2, 2)
score = check_performance(whitebox, X_random, y_random, X, y, verbose=True, num_epochs=10)
print 'Whitebox score on original dataset: ', score

(100, 2) (100, 2)
Black Box accuracy:  0.95
(1000, 2) (1000, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Whitebox score on original dataset:  0.95


In [7]:
## Breast Cancer Dataset
data = datasets.load_breast_cancer()
X = data.data
y = pd.get_dummies(data.target).values
feature_names = data.feature_names
print X.shape, y.shape

model = create_model(X.shape[1], y.shape[1])
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=10)
print X_random.shape, y_random.shape

whitebox = create_model(X_random.shape[1], y_random.shape[1])
score = check_performance(whitebox, X_random, y_random, X, y, verbose=True, num_epochs=10)
print 'Whitebox score on original dataset: ', score

(569, 30) (569, 2)
Black Box accuracy:  0.627416520211
(1000, 30) (1000, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Whitebox score on original dataset:  0.627416520211


In [17]:
## Heart Dataset
data = pd.read_csv('../data/heart.csv')
data['famhist'] = data['famhist'].map({"Absent": 0, "Present":1})
y = pd.get_dummies(data.pop("chd")).values
X = data.values
feature_names = data.columns
print X.shape, y.shape

model = create_model(X.shape[1], y.shape[1])
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=100)
print X_random.shape, y_random.shape

whitebox = create_model(X_random.shape[1], y_random.shape[1])
score = check_performance(whitebox, X_random, y_random, X, y, verbose=True, num_epochs=10)
print 'Whitebox score on original dataset: ', score

(462, 9) (462, 2)
Black Box accuracy:  0.692640692641
(1000, 9) (1000, 2)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Whitebox score on original dataset:  0.666666666667


In [18]:
## Using other statistical models as the whitebox with above neural network as black box
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

y = np.argmax(y, 1)
y_random = np.argmax(y_random, 1)
    
models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier()]
for model in models:
    
    score = check_performance(model, X_random, y_random, X, y, verbose=False)
    print 'Whitebox score on original dataset: ', score

Whitebox score on original dataset:  0.664502164502
Whitebox score on original dataset:  0.651515151515
Whitebox score on original dataset:  0.649350649351


In [7]:
## Adult Census dataset
with open('../../../Dataset/Census/train.pkl', 'rb') as fp:
    data = pkl.load(fp)
with open('../../../Dataset/Census/label.pkl', 'rb') as fp:
    label = pkl.load(fp)
feature_names = data.columns
X = data.values
y = pd.get_dummies(label).values
print X.shape, y.shape

model = create_model(X.shape[1], y.shape[1])
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=100)
print X_random.shape, y_random.shape

whitebox = create_model(X_random.shape[1], y_random.shape[1])
score = check_performance(whitebox, X_random, y_random, X, y, verbose=False, num_epochs=100)
print 'Whitebox score on original dataset: ', score

(32561, 107) (32561, 2)
Black Box accuracy:  0.759190442554
(1000, 107) (1000, 2)
Whitebox score on original dataset:  0.759190442554


In [None]:
## Using other statistical models as the whitebox with above neural network as black box
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

y = np.argmax(y, 1)
y_random = np.argmax(y_random, 1)
print Counter(y_random)

models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier()]
for model in models:
    
    score = check_performance(model, X_random, y_random, X, y, verbose=False)
    print 'Whitebox score on original dataset: ', score

In [10]:
## Mushroom Dataset
with open('../../../Dataset/Mushroom/train.pkl', 'rb') as fp:
    data = pkl.load(fp)
with open('../../../Dataset/Mushroom/label.pkl', 'rb') as fp:
    label = pkl.load(fp)
feature_names = data.columns
X = data.values
y = pd.get_dummies(label).values
print X.shape, y.shape

model = create_model(X.shape[1], y.shape[1])
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=10)
print X_random.shape, y_random.shape

whitebox = create_model(X_random.shape[1], y_random.shape[1])
score = check_performance(whitebox, X_random, y_random, X, y, verbose=False, num_epochs=300)
print 'Whitebox score on original dataset: ', score

(8124, 112) (8124, 2)
Black Box accuracy:  1.0
(1000, 112) (1000, 2)
Whitebox score on original dataset:  0.612875430822


In [6]:
## Bridges Dataset
with open('../../../Dataset/Bridges/train.pkl', 'rb') as fp:
    data = pkl.load(fp)
with open('../../../Dataset/Bridges/label.pkl', 'rb') as fp:
    label = pkl.load(fp)
feature_names = data.columns
X = data.values
y = pd.get_dummies(label).values
print X.shape, y.shape

model = create_model(X.shape[1], y.shape[1])
X_random, y_random = run_network(model, X, y, feature_names, num_epochs=10)
print X_random.shape, y_random.shape

whitebox = create_model(X_random.shape[1], y_random.shape[1])
score = check_performance(whitebox, X_random, y_random, X, y, verbose=False, num_epochs=300)
print 'Whitebox score on original dataset: ', score

(106, 29) (106, 2)
Black Box accuracy:  0.905660377358
(1000, 29) (1000, 2)
Whitebox score on original dataset:  0.905660377358
