In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

dataset = pd.read_csv('ecoli.csv')
dataset = shuffle(dataset)

dataset.head()

Unnamed: 0,NAME,MCG,GVH,LIP,CHG,AAC,ALM1,ALM2,SITE
153,CYDA_ECOLI,0.55,0.47,0.48,0.5,0.57,0.78,0.8,im
90,PTFA_ECOLI,0.34,0.46,0.48,0.5,0.52,0.35,0.44,cp
47,GT_ECOLI,0.43,0.4,0.48,0.5,0.39,0.28,0.39,cp
51,IF1_ECOLI,0.29,0.3,0.48,0.5,0.45,0.03,0.17,cp
110,SOXS_ECOLI,0.32,0.33,0.48,0.5,0.6,0.06,0.2,cp


In [2]:
dataset.shape

(220, 9)

In [3]:
# Deleting columns
dataset = dataset.drop(columns="NAME")
dataset = dataset.drop(columns="LIP")
dataset = dataset.drop(columns="CHG")

print(dataset['SITE'].unique())
print("Number of rows and columns:", dataset.shape)

dataset.head()

['im' 'cp']
Number of rows and columns: (220, 6)


Unnamed: 0,MCG,GVH,AAC,ALM1,ALM2,SITE
153,0.55,0.47,0.57,0.78,0.8,im
90,0.34,0.46,0.52,0.35,0.44,cp
47,0.43,0.4,0.39,0.28,0.39,cp
51,0.29,0.3,0.45,0.03,0.17,cp
110,0.32,0.33,0.6,0.06,0.2,cp


In [4]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(dataset['SITE'])
dataset['SITE'] = le.transform(dataset['SITE'])

#How did the label encoder map the targets?
targets = ['cp', 'im']
map1 = le.transform(targets)

mapping = pd.DataFrame(map1,targets)
mapping

Unnamed: 0,0
cp,0
im,1


In [5]:
dataset.head()

Unnamed: 0,MCG,GVH,AAC,ALM1,ALM2,SITE
153,0.55,0.47,0.57,0.78,0.8,1
90,0.34,0.46,0.52,0.35,0.44,0
47,0.43,0.4,0.39,0.28,0.39,0
51,0.29,0.3,0.45,0.03,0.17,0
110,0.32,0.33,0.6,0.06,0.2,0


In [6]:
# Separating X and y from original dataframe

X = dataset.iloc[:,0:5]
y = dataset.iloc[:,5]

X.head()

Unnamed: 0,MCG,GVH,AAC,ALM1,ALM2
153,0.55,0.47,0.57,0.78,0.8
90,0.34,0.46,0.52,0.35,0.44
47,0.43,0.4,0.39,0.28,0.39
51,0.29,0.3,0.45,0.03,0.17
110,0.32,0.33,0.6,0.06,0.2


In [7]:
y.head()

153    1
90     0
47     0
51     0
110    0
Name: SITE, dtype: int32

In [8]:
# Split dataset

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

In [9]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(176, 5) (176,)
(44, 5) (44,)


In [10]:
# Feature scaling using Standard scaler for "X"

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train.astype(np.float64))
X_test = scaler.fit_transform(X_test.astype(np.float64))

In [11]:
print(dataset)

      MCG   GVH   AAC  ALM1  ALM2  SITE
153  0.55  0.47  0.57  0.78  0.80     1
90   0.34  0.46  0.52  0.35  0.44     0
47   0.43  0.40  0.39  0.28  0.39     0
51   0.29  0.30  0.45  0.03  0.17     0
110  0.32  0.33  0.60  0.06  0.20     0
..    ...   ...   ...   ...   ...   ...
150  0.27  0.35  0.51  0.77  0.79     1
114  0.38  0.44  0.43  0.20  0.31     0
130  0.00  0.38  0.42  0.48  0.55     0
116  0.34  0.42  0.41  0.34  0.43     0
154  0.12  0.67  0.74  0.58  0.63     1

[220 rows x 6 columns]


In [12]:
# MLP using Keras

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

model = Sequential()
model.add(Dense(100,input_dim=5,activation='sigmoid'))
model.add(Dense(50, activation='sigmoid'))
model.add(Dense(1,activation='sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [13]:
# Fit the model on the training data
history = model.fit(X_train, y_train, batch_size=1, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [14]:
# Evaluating the model on the test data
loss,accuracy = model.evaluate(X_test, y_test, batch_size=1)
print("\nLoss: ",loss*100)
print("Accuracy: ",accuracy*100)


Loss:  23.932987451553345
Accuracy:  95.45454382896423


probabilities = model.predict(X_test)
predictions = [float(str(x[0])) for x in probabilities]
accuracy = np.mean(predictions)
print("Prediction Accuracy:",accuracy*100)