In [1]:
import random
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing

In [2]:
def makeDNN(numOfLayers, numOfNeurons, activationFunc):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Flatten())
    for i in range(numOfLayers):
        model.add(tf.keras.layers.Dense(numOfNeurons, activation = activationFunc))
        model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))
    return model

In [3]:
random.seed(5)
weights = [[10 * random.random() for i in range(len(attribute[0]) - 1)] for j in range(len(attribute[0]))]
def embedAttribute(attribute):
    attribute = attribute @ np.array(weights)
    return attribute

In [4]:
from sklearn.datasets import load_breast_cancer

In [5]:
data, target = load_breast_cancer(return_X_y = True)

In [6]:
target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [7]:
data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [8]:
std_scale = preprocessing.StandardScaler().fit(data)
data = std_scale.transform(data)

In [9]:
data

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [16]:
categoricalAttr = np.array([['dog', 2, 'man'],
                    ['cat', 1, 'car'],
                    ['hen', 0, 'man'],
                    ['cat', 3, 'pole'],
                    ['hen', 2, 'pole']])
continuousAttr = np.array([[0.2],
                            [0.53],
                            [0.19],
                            [0.98],
                            [1.0]])

In [17]:
categoricalAttr = pd.DataFrame(categoricalAttr)
continuousAttr = pd.DataFrame(continuousAttr, columns = [len(categoricalAttr.columns)])

In [18]:
pd.concat([categoricalAttr, continuousAttr], axis = 1)

Unnamed: 0,0,1,2,3
0,dog,2,man,0.2
1,cat,1,car,0.53
2,hen,0,man,0.19
3,cat,3,pole,0.98
4,hen,2,pole,1.0


In [13]:
encodedInput = pd.DataFrame()

In [14]:
for i in range(len(categoricalAttr.columns)):
    labelEncoder = preprocessing.LabelEncoder()
    labelTemp = labelEncoder.fit_transform(categoricalAttr.iloc[:,i])
    print(labelTemp)
    labelTemp = labelTemp.reshape(len(labelTemp), 1)
    #Embedding the One Hot Encoded feature
    sparseTemp = embedAttribute(tf.keras.utils.to_categorical(labelTemp))
    sparseTemp = pd.DataFrame(sparseTemp, columns = [(len(encodedInput.columns) + len(continuousAttr.columns) + i) for i in range(len(sparseTemp[0]))])
    print(sparseTemp)
    encodedInput = pd.concat([encodedInput, sparseTemp], axis = 1)

[1 0 2 0 2]
          1         2
0  7.951936  9.424503
1  6.229017  7.417870
2  7.398986  9.223250
3  6.229017  7.417870
4  7.398986  9.223250
[2 1 0 3 2]
          3         4         5
0  0.290052  4.656227  9.433567
1  9.424503  7.398986  9.223250
2  6.229017  7.417870  7.951936
3  6.489746  9.009005  1.132060
4  0.290052  4.656227  9.433567
[1 0 1 2 2]
          6         7
0  7.951936  9.424503
1  6.229017  7.417870
2  7.951936  9.424503
3  7.398986  9.223250
4  7.398986  9.223250


In [15]:
encodedInput

Unnamed: 0,1,2,3,4,5,6,7
0,7.951936,9.424503,0.290052,4.656227,9.433567,7.951936,9.424503
1,6.229017,7.41787,9.424503,7.398986,9.22325,6.229017,7.41787
2,7.398986,9.22325,6.229017,7.41787,7.951936,7.951936,9.424503
3,6.229017,7.41787,6.489746,9.009005,1.13206,7.398986,9.22325
4,7.398986,9.22325,0.290052,4.656227,9.433567,7.398986,9.22325


In [16]:
inputData = pd.concat([continuousAttr, encodedInput], axis = 1)

In [17]:
inputData

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.2,7.951936,9.424503,0.290052,4.656227,9.433567,7.951936,9.424503
1,0.53,6.229017,7.41787,9.424503,7.398986,9.22325,6.229017,7.41787
2,0.19,7.398986,9.22325,6.229017,7.41787,7.951936,7.951936,9.424503
3,0.98,6.229017,7.41787,6.489746,9.009005,1.13206,7.398986,9.22325
4,1.0,7.398986,9.22325,0.290052,4.656227,9.433567,7.398986,9.22325


In [16]:
optimalDNN = makeDNN(5, 16, 'relu')

In [17]:
optimalDNN.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [18]:
optimalDNN.fit(data, target, epochs = 6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7fc19042cac8>

In [18]:
inputData.values

array([[0.2       , 7.95193566, 9.42450284, 0.29005228, 4.65622654,
        9.43356717, 7.95193566, 9.42450284],
       [0.53      , 6.22901695, 7.41786989, 9.42450284, 7.39898575,
        9.22324997, 6.22901695, 7.41786989],
       [0.19      , 7.39898575, 9.22324997, 6.22901695, 7.41786989,
        7.95193566, 7.95193566, 9.42450284],
       [0.98      , 6.22901695, 7.41786989, 6.48974553, 9.00900492,
        1.13205965, 7.39898575, 9.22324997],
       [1.        , 7.39898575, 9.22324997, 0.29005228, 4.65622654,
        9.43356717, 7.39898575, 9.22324997]])

In [20]:
data = tf.keras.utils.normalize(data, axis = 1)

In [21]:
data

array([[7.92541486e-03, 4.57286305e-03, 5.40989964e-02, ...,
        1.16920795e-04, 2.02695018e-04, 5.23808686e-05],
       [8.66575595e-03, 7.48616836e-03, 5.59882822e-02, ...,
        7.83583182e-05, 1.15852352e-04, 3.75024596e-05],
       [9.36668268e-03, 1.01087865e-02, 6.18419883e-02, ...,
        1.15596947e-04, 1.71873157e-04, 4.16624718e-05],
       ...,
       [1.16438847e-02, 1.96964025e-02, 7.59658259e-02, ...,
        9.94640269e-05, 1.55579134e-04, 5.48525170e-05],
       [9.23020523e-03, 1.31418408e-02, 6.27743569e-02, ...,
        1.18738077e-04, 1.83125480e-04, 5.55604587e-05],
       [2.31098880e-02, 7.30820427e-02, 1.42709515e-01, ...,
        0.00000000e+00, 8.55006294e-04, 2.09626935e-04]])