In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
print(tf.__version__)

2.3.0


In [2]:
# Unpickle data
data = pd.read_pickle('data')

In [3]:
# Separate target and features
target = 'diagnosis'
y = data[target]
X = data.drop(columns=[target])
features_list = ['texture_mean', 'area_worst', 'smoothness_worst', 'area_mean', 'concavity_mean']
X = X[features_list]
y

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [4]:
# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
                                                    random_state=100) 
                                                    #stratify=y)
X_train

Unnamed: 0,texture_mean,area_worst,smoothness_worst,area_mean,concavity_mean
18,22.15,2398.0,0.15120,1260.0,0.147900
213,25.56,1021.0,0.12430,948.0,0.168200
532,16.33,773.4,0.12640,575.5,0.017520
191,21.41,579.5,0.09388,507.4,0.031120
235,21.25,715.5,0.12870,603.4,0.014620
...,...,...,...,...,...
526,18.75,719.8,0.16240,551.1,0.042010
53,18.70,1321.0,0.12800,1033.0,0.177200
350,17.07,542.5,0.09958,421.0,0.008306
79,18.00,622.1,0.12890,506.3,0.038890


## Keras

In [5]:
# Setup inputs and expected outputs 
cols = features_list

# inputs to Logistic Regression (via Tensorflow)
X_trainTf = X_train[cols].values
X_testTf = X_test[cols].values

# add constant columns to both
X_trainTf = np.hstack((np.ones((X_trainTf.shape[0], 1)), X_trainTf))
X_testTf = np.hstack((np.ones((X_testTf.shape[0], 1)), X_testTf))
X_trainTf

array([[1.000e+00, 2.215e+01, 2.398e+03, 1.512e-01, 1.260e+03, 1.479e-01],
       [1.000e+00, 2.556e+01, 1.021e+03, 1.243e-01, 9.480e+02, 1.682e-01],
       [1.000e+00, 1.633e+01, 7.734e+02, 1.264e-01, 5.755e+02, 1.752e-02],
       ...,
       [1.000e+00, 1.707e+01, 5.425e+02, 9.958e-02, 4.210e+02, 8.306e-03],
       [1.000e+00, 1.800e+01, 6.221e+02, 1.289e-01, 5.063e+02, 3.889e-02],
       [1.000e+00, 1.390e+01, 3.266e+02, 1.850e-01, 2.578e+02, 3.332e-02]])

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_test = le.fit_transform(y_test)
y_train = le.fit_transform(y_train)

In [7]:
# expectd outputs:
y_trainTf = y_train.reshape(-1,1)
y_testTf = y_test.reshape(-1,1)

In [8]:
from tensorflow import keras

model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(), tf.keras.layers.Dense(2, activation=tf.nn.softmax) ])

# compile
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# train
model.fit(X_trainTf, y_trainTf, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f9f7f18d150>

In [9]:
# Evaluate the model 
print("Evaluate on test data")
results = model.evaluate(X_testTf, y_testTf, batch_size=128)
print("Test loss: ", results[0])
print("Test acc: ", results[1])

Evaluate on test data
Test loss:  342.17584228515625
Test acc:  0.8304093480110168


## Tensorflow

In [10]:
# Convert outputs into array of binary -> [0,1],[1,0],...
from keras.utils import to_categorical
y_train_binary = to_categorical(y_trainTf)
y_test_binary = to_categorical(y_testTf)

y_train_binary.shape

(398, 2)

In [11]:
# define hyperparameters
num_classes = len(np.unique(y_trainTf))
num_features = X_trainTf.shape[1]

input_layer = num_features # rename

learning_rate = 0.01
training_steps = 1000
batch_size = 256
display_step = 50

y_trainTf.shape

(398, 1)

In [12]:
# model training 
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Model

input_layer = Input(shape=(num_features,)) # number of features
dense_layer_1 = Dense(15, activation='relu')(input_layer)
dense_layer_2 = Dense(10, activation='relu')(dense_layer_1)
output = Dense(y_train_binary.shape[1], activation='softmax')(dense_layer_2)

# define model
model = Model(inputs=input_layer, outputs=output)

# definre optimizer (Adam) and learning rate
opt = keras.optimizers.Adam(learning_rate=learning_rate)

# compile the model, with loss categorical cross entropy and accuracy metric
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])

# Compile the model
#model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
#              loss='sparse_categorical_crossentropy',
#              metrics=['accuracy'], learning_rate=0.01)

print(model.summary())

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6)]               0         
_________________________________________________________________
dense_1 (Dense)              (None, 15)                105       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                160       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 22        
Total params: 287
Trainable params: 287
Non-trainable params: 0
_________________________________________________________________
None


In [13]:
history = model.fit(X_trainTf, y_train_binary, batch_size=50, epochs=50, verbose=1, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
score = model.evaluate(X_testTf, y_test_binary, verbose=1)

print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 0.36706289649009705
Test Accuracy: 0.8362573385238647
