In [65]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

In [66]:
df = pd.read_csv('data.csv')

In [67]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [68]:
df.drop('RowNumber', axis=1)

Unnamed: 0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [69]:
train, val, test = np.split(df.sample(frac=1), [int(0.8*len(df)), int(0.9*len(df))])

In [70]:
print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

8000 training examples
1000 validation examples
1000 test examples


In [71]:
def df_to_dataset(df, shuffle=True, batch_size=32):
  df = df.copy()
  labels = df.pop('Exited')
  df = {key: value[:,tf.newaxis] for key, value in df.items()}
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds

In [72]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)

  df = {key: value[:,tf.newaxis] for key, value in df.items()}


In [73]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of ages:', train_features['Geography'])
print('A batch of targets:', label_batch )

Every feature: ['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']
A batch of ages: tf.Tensor(
[[b'France']
 [b'France']
 [b'France']
 [b'France']
 [b'France']], shape=(5, 1), dtype=string)
A batch of targets: tf.Tensor([0 0 0 0 0], shape=(5,), dtype=int64)


In [74]:
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for the feature.
  normalizer = layers.Normalization(axis=None)

  # Prepare a Dataset that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)

  return normalizer

In [75]:
photo_count_col = train_features['CreditScore']
layer = get_normalization_layer('CreditScore', train_ds)
layer(photo_count_col)

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[ 0.34424615],
       [ 1.3288893 ],
       [-0.7025849 ],
       [-1.0549835 ],
       [-1.0031602 ]], dtype=float32)>

In [76]:
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
  # Create a layer that turns strings into integer indices.
  if dtype == 'string':
    index = layers.StringLookup(max_tokens=max_tokens)
  # Otherwise, create a layer that turns integer values into integer indices.
  else:
    index = layers.IntegerLookup(max_tokens=max_tokens)

  # Prepare a `tf.data.Dataset` that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the set of possible values and assign them a fixed integer index.
  index.adapt(feature_ds)

  # Encode the integer indices.
  encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())

  # Apply multi-hot encoding to the indices. The lambda function captures the
  # layer, so you can use them, or include them in the Keras Functional model later.
  return lambda feature: encoder(index(feature))

In [77]:
test_type_col = train_features['Geography']
test_type_layer = get_category_encoding_layer(name='Geography',
                                              dataset=train_ds,
                                              dtype='string')
test_type_layer(test_type_col)

<tf.Tensor: shape=(5, 4), dtype=float32, numpy=
array([[0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)>

In [78]:
test_type_col = train_features['Gender']
test_type_layer = get_category_encoding_layer(name='Gender',
                                              dataset=train_ds,
                                              dtype='string')
test_type_layer(test_type_col)

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)>

In [79]:
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

  df = {key: value[:,tf.newaxis] for key, value in df.items()}
  df = {key: value[:,tf.newaxis] for key, value in df.items()}
  df = {key: value[:,tf.newaxis] for key, value in df.items()}


In [80]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [81]:
['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited']

['RowNumber',
 'CustomerId',
 'Surname',
 'CreditScore',
 'Geography',
 'Gender',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary',
 'Exited']

In [82]:
NUMERIC_FEATURE_KEYS = [
    'Age','Balance','CreditScore','CustomerId','EstimatedSalary','HasCrCard','IsActiveMember','NumOfProducts','RowNumber','Tenure'
]

In [83]:
SCALE_FEATURES = ['CreditScore','Age','Tenure','Balance','NumOfProducts','EstimatedSalary']

In [84]:
all_inputs = []
encoded_features = []

# Numerical features.
for header in SCALE_FEATURES:
  numeric_col = tf.keras.Input(shape=(1,), name=header)
  normalization_layer = get_normalization_layer(header, train_ds)
  encoded_numeric_col = normalization_layer(numeric_col)
  all_inputs.append(numeric_col)
  encoded_features.append(encoded_numeric_col)

In [85]:
CATEGORICAL_FEATURE_KEYS = ['Geography','Gender']

for header in CATEGORICAL_FEATURE_KEYS:
  categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
  encoding_layer = get_category_encoding_layer(name=header,
                                               dataset=train_ds,
                                               dtype='string',
                                               max_tokens=5)
  encoded_categorical_col = encoding_layer(categorical_col)
  all_inputs.append(categorical_col)
  encoded_features.append(encoded_categorical_col)

In [86]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(all_inputs, output)

In [87]:
all_features.ndim


<KerasTensor: shape=(None, 13) dtype=float32 (created by layer 'concatenate_2')>

In [None]:
x = tf.keras.layers.Reshape((12,), input_shape = (1, 12))(inputs)

x = tf.keras.layers.Dense(11, activation='relu')(x) 
x = tf.keras.layers.Dense(11, activation='relu')(x)
x = tf.keras.layers.Dense(11, activation='sigmoid')(x)

In [53]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [54]:
# Use `rankdir='LR'` to make the graph horizontal.
tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [55]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10


  inputs = self._flatten_to_reference_inputs(inputs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1f4b7154a30>

In [56]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.7889999747276306


In [57]:
#Define imports
from pyexpat import model
# from kerastuner.engine import base_tuner
# import kerastuner as kt
from tensorflow import keras
from typing import NamedTuple, Dict, Text, Any
from tfx.components.trainer.fn_args_utils import FnArgs  
import tensorflow as tf
import tensorflow_transform as tft
import sklearn
from sklearn.ensemble import RandomForestClassifier
from tfx.components.trainer.fn_args_utils import DataAccessor
import os
import pickle
from typing import Tuple
from tfx.components.trainer.fn_args_utils import DataAccessor
from tfx.components.trainer.fn_args_utils import FnArgs
from tfx.dsl.io import fileio
from tfx.utils import io_utils
from tfx_bsl.tfxio import dataset_options
from tensorflow_metadata.proto.v0 import schema_pb2
import numpy as np
import absl
# import tensorflow_decision_forests as tfdf
import tensorflow_transform as tft
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

ONE_HOT_FEATURES2   = {
    'Gender':2,'Geography':3
}

ONE_HOT_FEATURES   = [
    'Gender','Geography'
]

BUCKETIZE = {
    'Age' : 10
}

NUMERIC_FEATURE_KEYS = [
    'Age','Balance','CreditScore','CustomerId','EstimatedSalary','HasCrCard','IsActiveMember','NumOfProducts','RowNumber','Tenure'
]

CATEGORICAL_FEATURE_KEYS = ['Geography','Gender']

SCALE_FEATURES = ['CreditScore','Age','Tenure','Balance','NumOfProducts','EstimatedSalary']

LABEL_KEY = 'Exited'

# Renamimg Features   
def transformed_name(key):
    return key + '_xf'

#Define Callbacks
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience= 10)

#Load compressed data
def _gzip_reader_fn(filenames):
    return tf.data.TFRecordDataset(filenames, compression_type= 'GZIP')

#Load data
def _input_fn(file_pattern: str, tf_transform_output: tft.TFTransformOutput, num_epochs= None, batch_size: int = 200,) -> tf.data.Dataset:

    # Get post transform feature specification
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    #create batches of features and labels
    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern= file_pattern,
        batch_size= batch_size,
        features= transformed_feature_spec,
        reader = _gzip_reader_fn,
        num_epochs= num_epochs,
        label_key= transformed_name(LABEL_KEY)
    )

    return dataset
  

#Build model
def get_model():
    #One-hot Categorical Features
    input_features = []
    for key in ONE_HOT_FEATURES:
        input_features.append(
            tf.keras.Input(shape = (1,),
            dtype = "string",
            name = transformed_name(key))
        )
    #Scale Features
    for key in SCALE_FEATURES:
        input_features.append(
            tf.keras.Input(shape = (1,),
            name = transformed_name(key))
        )
    
    inputs = input_features
    all_inputs = tf.keras.layers.concatenate(inputs)
    #reshaped_narrative = tf.reshape(inputs[0], [-1])

    #x = tf.keras.layers.Reshape((3, 4), input_shape = (12,))(all_inputs)

    # d = tf.keras.layers.concatenate(inputs)
      
    x = tf.keras.layers.Dense(8, activation='relu')(all_inputs) 
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(16, activation='sigmoid')(x)

    outputs = tf.keras.layers.Dense(3, activation = 'sigmoid')(x)

    keras_model = tf.keras.Model(inputs= inputs, outputs=outputs) 

    keras_model.compile(   
                   optimizer=tf.keras.optimizers.Adam(1e-2), 
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  
                    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(),
                             tf.keras.metrics.BinaryAccuracy(),
                             tf.keras.metrics.TruePositives()])
    keras_model.summary()
    return keras_model
 

def _get_serve_tf_examples_fn(model, tf_transform_output):
    
    model.tft_layer = tf_transform_output.transform_features_layer()
    
    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        
        feature_spec = tf_transform_output.raw_feature_spec()
        
        feature_spec.pop("Exited")
        
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)
        
        transformed_features = model.tft_layer(parsed_features)
        
        # get predictions using the transformed features
        return model(transformed_features)
        
    return serve_tf_examples_fn

#Run
def run_fn(fn_args: FnArgs) -> None:
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir = fn_args.model_run_dir, update_freq = 'batch'
    )
    es = tf.keras.callbacks.EarlyStopping(monitor = 'val_binary_accuracy', mode = 'max', verbose = 1, patience = 10)
    mc = tf.keras.callbacks.ModelCheckpoint(fn_args.serving_model_dir, monitor = 'val_binary_accuracy', mode = 'max', verbose =1, save_best_only = True)

    #
    schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # Load tf_transform_output
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Create batches of data
    train_set = _input_fn(fn_args.train_files, tf_transform_output, 10)
    eval_set = _input_fn(fn_args.eval_files, tf_transform_output, 10)

    train_dataset = _input_fn(fn_args.train_files,tf_transform_output)
    eval_dataset = _input_fn(fn_args.eval_files,tf_transform_output)


In [58]:
#Build model
def get_model():
    #One-hot Categorical Features
    input_features = []
    for key in ONE_HOT_FEATURES:
        input_features.append(
            tf.keras.Input(shape = (1,),
            dtype = "string",
            name = transformed_name(key))
        )
    #Scale Features
    for key in SCALE_FEATURES:
        input_features.append(
            tf.keras.Input(shape = (1,),
            name = transformed_name(key))
        )
    
    inputs = input_features
    all_inputs = tf.keras.layers.concatenate(inputs)
    #reshaped_narrative = tf.reshape(inputs[0], [-1])

    #x = tf.keras.layers.Reshape((3, 4), input_shape = (12,))(all_inputs)

    # d = tf.keras.layers.concatenate(inputs)
      
    x = tf.keras.layers.Dense(8, activation='relu')(all_inputs) 
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    x = tf.keras.layers.Dense(16, activation='sigmoid')(x)

    outputs = tf.keras.layers.Dense(3, activation = 'sigmoid')(x)

    keras_model = tf.keras.Model(inputs= inputs, outputs=outputs) 

    keras_model.compile(   
                   optimizer=tf.keras.optimizers.Adam(1e-2), 
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  
                    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(),
                             tf.keras.metrics.BinaryAccuracy(),
                             tf.keras.metrics.TruePositives()])
    keras_model.summary()
    return keras_model
 

def _get_serve_tf_examples_fn(model, tf_transform_output):
    
    model.tft_layer = tf_transform_output.transform_features_layer()
    
    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        
        feature_spec = tf_transform_output.raw_feature_spec()
        
        feature_spec.pop("Exited")
        
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)
        
        transformed_features = model.tft_layer(parsed_features)
        
        # get predictions using the transformed features
        return model(transformed_features)
        
    return serve_tf_examples_fn

#Run
def run_fn(fn_args: FnArgs) -> None:
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir = fn_args.model_run_dir, update_freq = 'batch'
    )
    es = tf.keras.callbacks.EarlyStopping(monitor = 'val_binary_accuracy', mode = 'max', verbose = 1, patience = 10)
    mc = tf.keras.callbacks.ModelCheckpoint(fn_args.serving_model_dir, monitor = 'val_binary_accuracy', mode = 'max', verbose =1, save_best_only = True)

    #
    schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # Load tf_transform_output
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Create batches of data
    train_set = _input_fn(fn_args.train_files, tf_transform_output, 10)
    eval_set = _input_fn(fn_args.eval_files, tf_transform_output, 10)

    train_dataset = _input_fn(fn_args.train_files,tf_transform_output)
    eval_dataset = _input_fn(fn_args.eval_files,tf_transform_output)

    # Build the model
    model = get_model()
    model.fit(##tf.expand_dims(train_set, axis= -1)##,
              train_set, 
              validation_steps = 32, 
              validation_data = eval_set)
    absl.logging.info(model)

    evaluation = model.evaluate(eval_set, steps = 32)
    absl.logging.info('Accuracy: %f', evaluation)
    
    signatures = {
        'serving_default':
        _get_serve_tf_examples_fn(model, 
                                 tf_transform_output).get_concrete_function(
                                    tf.TensorSpec(
                                    shape=[None],
                                    dtype=tf.string,
                                    name='examples')) 
    }
    # model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
    model.save(fn_args.serving_model_dir,
               signatures=signatures, 
               save_format='tf')

    # Export the model as a pickle named model.pkl. AI Platform Prediction expects
  # sklearn model artifacts to follow this naming convention.
    

 

In [64]:
fn_args = FnArgs

# tensorboard_callback = tf.keras.callbacks.TensorBoard(
#       log_dir = fn_args.model_run_dir, update_freq = 'batch'
    # )
# es = tf.keras.callbacks.EarlyStopping(monitor = 'val_binary_accuracy', mode = 'max', verbose = 1, patience = 10)
# mc = tf.keras.callbacks.ModelCheckpoint(fn_args.serving_model_dir, monitor = 'val_binary_accuracy', mode = 'max', verbose =1, save_best_only = True)

    #
schema = io_utils.parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())

    # Load tf_transform_output
tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Create batches of data
train_set = _input_fn(fn_args.train_files, tf_transform_output, 10)
eval_set = _input_fn(fn_args.eval_files, tf_transform_output, 10)

train_dataset = _input_fn(fn_args.train_files,tf_transform_output)
eval_dataset = _input_fn(fn_args.eval_files,tf_transform_output)

    # Build the model
model = get_model()
model.fit(##tf.expand_dims(train_set, axis= -1)##,
              train_set, 
              validation_steps = 32, 
              validation_data = eval_set)
absl.logging.info(model)

evaluation = model.evaluate(eval_set, steps = 32)
absl.logging.info('Accuracy: %f', evaluation)
    
signatures = {
        'serving_default':
        _get_serve_tf_examples_fn(model, 
                                 tf_transform_output).get_concrete_function(
                                    tf.TensorSpec(
                                    shape=[None],
                                    dtype=tf.string,
                                    name='examples')) 
    }
    # model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)
model.save(fn_args.serving_model_dir,
               signatures=signatures, 
               save_format='tf')

    # Export the model as a pickle named model.pkl. AI Platform Prediction expects
  # sklearn model artifacts to follow this naming convention.
    


AttributeError: type object 'FnArgs' has no attribute 'schema_file'