In [1]:
from numpy import unique
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Embedding
from keras.layers.merge import concatenate

In [2]:
def load_dataset(path):
    # load the dataset as a pandas DataFrame
    df = pd.read_csv(path, header=None)
    # retrieve numpy array
    df = df.values
    # split into input (X) and output (y) variables
    features = df[:, :-1]
    target = df[:,-1]
    # format all fields as string
    features = features.astype(str)
    # reshape target to be a 2d array
    target = target.reshape((len(target), 1))
    return features, target

In [3]:
def prepare_inputs(features_train, features_test):
    features_train_enc, features_test_enc = list(), list()
    # label encode each column
    for i in range(features_train.shape[1]):
        le = LabelEncoder()
        le.fit(features_train[:, i])
        # encode
        train_enc = le.transform(features_train[:, i])
        test_enc = le.transform(features_test[:, i])
        # store
        features_train_enc.append(train_enc)
        features_test_enc.append(test_enc)
    return features_train_enc, features_test_enc

In [4]:
def prepare_targets(target_train, target_test):
    le = LabelEncoder()
    le.fit(target_train)
    target_train_enc = le.transform(target_train)
    target_test_enc = le.transform(target_test)
    return target_train_enc, target_test_enc

In [5]:
features, target = load_dataset('D://DS//Stater_Projects//breastcancer.txt')

In [6]:
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=0.33, random_state=12345)

In [7]:
features_train_enc, features_test_enc = prepare_inputs(features_train, features_test)

In [8]:
target_train_enc, target_test_enc = prepare_targets(target_train, target_test)

  return f(*args, **kwargs)


In [9]:
target_train_enc = target_train_enc.reshape((len(target_train_enc), 1, 1))
target_test_enc = target_test_enc.reshape((len(target_test_enc), 1, 1))

In [10]:
in_layers = list()
em_layers = list()

In [11]:
for i in range(len(features_train_enc)):
    # calculate the number of unique inputs
    n_labels = len(unique(features_train_enc[i]))
    # define input layer
    in_layer = Input(shape=(1,))
    # define embedding layer
    em_layer = Embedding(n_labels, 10)(in_layer)
    # store layers
    in_layers.append(in_layer)
    em_layers.append(em_layer)

In [12]:
merge = concatenate(em_layers)
dense = Dense(10, activation='relu', kernel_initializer='he_normal')(merge)
output = Dense(1, activation='sigmoid')(dense)
model = Model(inputs=in_layers, outputs=output)

In [13]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
model.fit(features_train_enc, target_train_enc, epochs=100, batch_size=16, verbose=2)

Epoch 1/100
12/12 - 27s - loss: 0.6711 - accuracy: 0.6963
Epoch 2/100
12/12 - 0s - loss: 0.6404 - accuracy: 0.6911
Epoch 3/100
12/12 - 0s - loss: 0.6142 - accuracy: 0.6911
Epoch 4/100
12/12 - 0s - loss: 0.5933 - accuracy: 0.6911
Epoch 5/100
12/12 - 0s - loss: 0.5738 - accuracy: 0.6963
Epoch 6/100
12/12 - 0s - loss: 0.5612 - accuracy: 0.7120
Epoch 7/100
12/12 - 0s - loss: 0.5472 - accuracy: 0.7173
Epoch 8/100
12/12 - 0s - loss: 0.5374 - accuracy: 0.7330
Epoch 9/100
12/12 - 0s - loss: 0.5280 - accuracy: 0.7382
Epoch 10/100
12/12 - 0s - loss: 0.5191 - accuracy: 0.7539
Epoch 11/100
12/12 - 0s - loss: 0.5113 - accuracy: 0.7592
Epoch 12/100
12/12 - 0s - loss: 0.5045 - accuracy: 0.7644
Epoch 13/100
12/12 - 0s - loss: 0.4972 - accuracy: 0.7592
Epoch 14/100
12/12 - 0s - loss: 0.4938 - accuracy: 0.7644
Epoch 15/100
12/12 - 0s - loss: 0.4863 - accuracy: 0.7749
Epoch 16/100
12/12 - 0s - loss: 0.4818 - accuracy: 0.7906
Epoch 17/100
12/12 - 0s - loss: 0.4759 - accuracy: 0.7906
Epoch 18/100
12/12 - 0

<keras.callbacks.History at 0x1fc30010548>

In [15]:
_, accuracy = model.evaluate(features_test_enc, target_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 63.16
