In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import pandas as pd
import numpy as np

In [2]:
from tensorflow.keras.layers.experimental import RandomFourierFeatures

In [4]:
columns = ['sample_code', 'clump_thickness', 'cell_size_uniformity', 'cell_shape_uniformity', 'marginal_adhesion', 'single_epithelial_cell_size',
             'bare_nuclei', 'bland_chromatin', 'normal_nucleoli', 'mitoses', 'class']
data = pd.read_csv("../data/breast-cancer-wisconsin.data", header=None, names=columns, na_values=[np.nan, '?'])
data = data.fillna(data.median())

In [9]:
np.random.seed(1)
train = data.sample(frac=0.8).copy()
y_train = (train['class'] == 4).astype(int)
train.drop(['sample_code', 'class'], axis=1, inplace=True)

test = data.loc[~data.index.isin(train.index)].copy()
y_test = (test['class'] == 4).astype(int)
test.drop(['sample_code', 'class'], axis=1, inplace=True)

In [12]:
def create_svc(feature_columns, feature_layer_inputs, optimizer, loss="hinge", metrics=["accuracy"],
               l2=0.01, output_dim=64, scale=None):
  
  regularizer = keras.regularizers.l2(l2)
  feature_layer = keras.layers.DenseFeatures(feature_columns)
  feature_layer_outputs = feature_layer(feature_layer_inputs)
  norm = keras.layers.BatchNormalization()(feature_layer_outputs)
  rff = RandomFourierFeatures(output_dim=output_dim, scale=scale, kernel_initializer="gaussian")(norm)
  outputs = keras.layers.Dense(1, 
                               kernel_initializer="normal",
                               kernel_regularizer=regularizer,
                               activation="sigmoid")(rff)
                              
  model = keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=outputs)
  model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
  return model

In [13]:
def define_feature_column_layers(data, categorical_cols, numeric_cols):
    feature_columns = list()
    feature_layer_inputs = dict()
    
    for feature_name in numeric_cols:
        feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name)
        
    for feature_name in categorical_cols:
        vocabulary = data[feature_name].unique()
        cat = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
        cat_one_hot = tf.feature_column.indicator_column(cat)
        feature_columns.append(cat_one_hot)
        feature_layer_inputs[feature_name] = tf.keras.Input(shape=(1,), name=feature_name, dtype=tf.int32)
        
    return feature_columns, feature_layer_inputs

In [14]:
def make_input_fn(data, label, num_epochs=10, shuffle=True, batch_size=256):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data), label))
        if shuffle:
            ds = ds.shuffle(1000)
        
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

In [15]:
import tempfile

def canned_keras(model):
    model_dir = tempfile.mkdtemp()
    keras_estimator = keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir)
    return keras_estimator

In [16]:
categorical_cols = list()
numeric_cols = ['clump_thickness', 'cell_size_uniformity', 'cell_shape_uniformity', 'marginal_adhesion', 'single_epithelial_cell_size',
'bare_nuclei', 'bland_chromatin', 'normal_nucleoli', 'mitoses']

feature_columns, feature_layer_inputs = define_feature_column_layers(data, categorical_cols, numeric_cols)
optimizer = keras.optimizers.Adam(learning_rate=0.00005)
model = create_svc(feature_columns, feature_layer_inputs, optimizer, loss="hinge", l2=0.001, output_dim=512)

estimator = canned_keras(model)

train_input_fn = make_input_fn(train, y_train, num_epochs=500, batch_size=512)
test_input_fn = make_input_fn(test, y_test, num_epochs=1, shuffle=False)

estimator.train(train_input_fn)

result = estimator.evaluate(test_input_fn)
print(result)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
  updates = self.state_updates


{'accuracy': 0.95714283, 'loss': 0.7258061, 'global_step': 1000}
