In [2]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense,Activation,Flatten
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import math

In [3]:
#import warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv("../input/boson-dataset/Datanew.csv")
df.head()

# Exploratory Data Analysis

In [5]:
df.info()

In [6]:
df.shape

In [7]:
#checking missing values
df.isnull().any().sum()

In [8]:
# statistical measures about the data
df.describe()

In [9]:
# checking the distribution of Target Varibale
df['Label'].value_counts()

In [10]:
df.groupby('Label').mean()

In [11]:
sns.set_style('whitegrid')
sns.countplot(x='Label',data=df)

In [40]:
# pieplot
ax2 = plt.subplot(1,2,2)
plt.pie(df["Label"].value_counts(),
        labels=list(df["Label"].unique()),
        autopct='%1.2f%%',
        pctdistance=0.8,
        shadow=True,
        radius=1.3,
        textprops={'fontsize':14}
       )
ax2.set_xlabel(" ")
plt.xlabel('Composition of "Label"', fontsize=15, labelpad=20)
plt.subplots_adjust(wspace=0.4)
plt.show()

In [12]:
object_columns = df.select_dtypes(include=['object']).columns

In [13]:
object_columns

#### Encoding Concept 

In [14]:
# Import label encoder 
from sklearn import preprocessing
# label_encoder object knows how to understand word labels. 
label_encoder = preprocessing.LabelEncoder()

df['Label']= label_encoder.fit_transform(df['Label']) 
print(df.head())

In [15]:
df.info()

### Building a Model

In [16]:
X = df.drop(columns='Label', axis=1)
Y = df['Label']

In [17]:
X

In [18]:
Y

In [19]:
### Splitting the data into training data & Testing data

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

In [21]:
print(X.shape, X_train.shape, X_test.shape)

In [22]:
### Feature selection

In [23]:
from sklearn.feature_selection import mutual_info_classif
# determine the mutual information
mutual_info = mutual_info_classif(X_train, Y_train)
mutual_info

In [24]:
mutual_info = pd.Series(mutual_info)
mutual_info.index = X_train.columns
mutual_info.sort_values(ascending=False)

In [25]:
#let's plot the ordered mutual_info values per feature
mutual_info.sort_values(ascending=False).plot.bar(figsize=(20, 8))

In [26]:
from sklearn.feature_selection import SelectKBest

In [27]:
#Now we Will select the  top 5 important features
sel_five_cols = SelectKBest(mutual_info_classif, k=5)
sel_five_cols.fit(X_train, Y_train)
X_train.columns[sel_five_cols.get_support()]

In [28]:
df['DER_mass_MMC'].value_counts

In [36]:
df['DER_mass_transverse_met_lep'].value_counts

In [37]:
df['DER_mass_vis'].value_counts

In [38]:
df['PRI_tau_pt'].value_counts

In [39]:
df['Weight'].value_counts

In [29]:
X_train_new = sel_five_cols.transform(X_train)
X_test_new =  sel_five_cols.transform(X_test)

In [30]:
### Standardize the data

In [31]:
from sklearn.preprocessing import StandardScaler

In [32]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train_new)
X_test_std = scaler.transform(X_test_new)

In [33]:
from keras import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(12, input_dim=5, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [34]:
import tensorflow as tf
#Compile the model
model.compile(
    loss = tf.keras.losses.binary_crossentropy,
    optimizer = tf.keras.optimizers.Adam(lr = 0.02),
    metrics = [
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

In [35]:
# fit the keras model on the dataset
history = model.fit(X_train_std, Y_train, epochs=100, validation_split=0.1,batch_size=10)
# evaluate the keras model
_, accuracy = model.evaluate(X, Y)
print('Accuracy: %.2f' % (accuracy*100))
print('precision')
print('recall')

In [None]:
# evaluate the keras model
_, accuracy = model.evaluate(X_test_std, Y_test)
print('Accuracy: %.2f' % (accuracy*100))
_,precision = model.evaluate(X_test_std, Y_test)
print('Precision: %f' % precision)
_,recall = model.evaluate(X_test_std, Y_test)
print('Recall: %f' % recall)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.legend(['training data', 'validation data'], loc = 'lower right')

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['training data', 'validation data'], loc = 'upper right')

In [None]:
#saving the model

In [None]:

model.save('keras_model.h5')

In [None]:
#loading the model 
from keras.models import load_model
model = load_model('keras_model.h5')

# Explainable AI

In [None]:
import shap
shap.initjs()

In [None]:
X_sample =pd.DataFrame( X_train_new,columns = X_train.columns[sel_five_cols.get_support()]).sample(100)

In [None]:
X_sample

In [None]:
explainer = shap.KernelExplainer(model.predict,X_train_new)

In [None]:
shap_values = explainer.shap_values(X_sample)

In [None]:
shap.summary_plot(shap_values, X_sample, plot_type="bar")

In [None]:
###force plot
shap.force_plot(explainer.expected_value[0], shap_values[0], features = X_sample)

In [None]:
shap.summary_plot(shap_values[0], features = X_sample)