In [None]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import keras
import lime
from lime import lime_tabular
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

In [None]:
# read dataset
dataset = pd.read_csv('../dataset/iris/iris.data', names=['SepalLength', 'SepalWidth', 'PetalLength',  'PetalWidth', 'class'])

In [None]:
# print some examples
dataset[::20]

In [None]:
# print calss names
print('Dataset classes:')
class_names = dataset['class'].unique()
print(class_names)

In [None]:
barplot_data = dataset.groupby('class', as_index=False).count()[['class', 'SepalLength']]
barplot_data.columns = ['class', 'count']
sns.barplot(data=barplot_data, x='class', y='count')
plt.show()

In [None]:
# data describtion
dataset.describe()

In [None]:
# some informations about dataset
dataset.info()

In [None]:
sns.pairplot(dataset, hue='class')
plt.show()

In [None]:
# SepalLength  SepalWidth  PetalLength  PetalWidth
8, 3, 6.5, 2.5

In [None]:
# change name of classes to categorical class with numbers
dataset.loc[dataset['class'] == 'Iris-setosa','class'] = 0
dataset.loc[dataset['class'] == 'Iris-versicolor','class'] = 1
dataset.loc[dataset['class'] == 'Iris-virginica','class'] = 2

# print some examples
dataset[::20]

In [None]:
# Converting data to numpy array in order for processing
X = dataset.iloc[:, 0:4]
y = dataset.iloc[:, 4]

# print X and y shapes
print("Shape of X",X.shape)
print("Shape of y",y.shape)

In [None]:
# print examples
print('Examples of X:')
print(X[::50])

print('Examples of y:')
print(y[::50])

In [None]:
# Normalize the input
normalized_dataset = pd.DataFrame(normalize(dataset.iloc[:,0:4], axis=0))
normalized_dataset.columns = dataset.columns[:4]
normalized_dataset['class'] = dataset['class']
print('Examples of normalized_dataset:')
normalized_dataset[::50]

In [None]:
# Creating trainset, testset and validation data
total_length = len(normalized_dataset)
train, test = train_test_split(normalized_dataset, test_size=0.2, shuffle=True)

X_train = train.iloc[:,0:4]
X_test = test.iloc[:,0:4]
y_train = train.iloc[:,4]
y_test = test.iloc[:,4]

# print tainset and testset shape
print("Length of train set x:", X_train.shape, "y:", y_train.shape)
print("Length of test set x:", X_test.shape, "y:", y_test.shape)

In [None]:
# Neural network module
from keras.models import Sequential 
from keras.layers import Dense, Activation, Dropout 
from keras.utils import np_utils
import tensorflow as tf

In [None]:
# Change the label to one hot vector

# [0]--->[1 0 0]
# [1]--->[0 1 0]
# [2]--->[0 0 1]

y_train = np_utils.to_categorical(y_train, num_classes=3)
y_test = np_utils.to_categorical(y_test, num_classes=3)

# print tainset and testset shape
print("Shape of y_train", y_train.shape)
print("Shape of y_test", y_test.shape)

In [None]:
model = Sequential()
model.add(Dense(1000, input_dim=4, activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_train, y_train, batch_size=20, epochs=10, verbose=True)

In [None]:
prediction = model.predict(X_test)
length = len(prediction)
y_label = np.argmax(y_test, axis=1)
predict_label = np.argmax(prediction, axis=1)
accuracy = np.sum(y_label == predict_label) / length * 100 
print("Accuracy of the dataset", accuracy)

In [None]:
# create the explainer object
explainer = lime_tabular.LimeTabularExplainer(
    training_data=np.array(X_train),
    feature_names=X_train.columns.values.tolist(),
    class_names=class_names,
    mode='classification'
)

In [None]:
# id of the instance that we will predict her class
i = 1

# generate the explanations
exp = explainer.explain_instance(
    data_row=X_test.iloc[i], 
    top_labels=3,
    predict_fn=model.predict
)

In [None]:
# print some informations about the instance
print('Features value: ')
print(X_test.iloc[i], end='\n\n')
print('Real label:', class_names[np.argmax(y_test[i])])
print('Predicted label:', class_names[np.argmax(model.predict([list(X_test.iloc[i])], verbose=False)[0])])

# show explanations
exp.show_in_notebook(show_table=True)