In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import utils

In [2]:
df_train, df_val, df_test = utils.load_data()
print(f"len(df_train) = {len(df_train)}, len(df_val) = {len(df_val)}, len(df_test) = {len(df_test)}")

len(df_train) = 7383, len(df_val) = 1846, len(df_test) = 2308


In [3]:
df_train.head()

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,PhotoAmt,target
8629,Cat,2,Domestic Short Hair,Female,Brown,Cream,Medium,Short,No,No,Healthy,0,4,1
6940,Dog,12,Mixed Breed,Female,Black,White,Medium,Short,Yes,Yes,Healthy,0,3,0
8030,Dog,1,Mixed Breed,Male,Black,Brown,Medium,Short,No,No,Healthy,0,11,1
299,Cat,3,Domestic Short Hair,Female,White,No Color,Small,Short,No,No,Healthy,0,3,1
10244,Cat,12,Oriental Short Hair,Male,Black,White,Large,Medium,No,No,Healthy,0,3,1


In [4]:
feature_columns = []

In [5]:
# numeric cols
for column_name in ['PhotoAmt', 'Fee', 'Age']:
    feature_columns.append(tf.feature_column.numeric_column(column_name))

In [6]:
# bucketized cols
age = tf.feature_column.numeric_column('Age')
age_buckets = tf.feature_column.bucketized_column(age, boundaries=[1, 2, 3, 4, 5])
feature_columns.append(age_buckets)

In [7]:
# indicator_columns
indicator_column_names = [ 'Type', 'Color1', 'Color2', 'Gender', 'MaturitySize', 'FurLength', 'Vaccinated', 'Sterilized', 'Health']
for column_name in indicator_column_names:
    categorical_column = tf.feature_column.categorical_column_with_vocabulary_list(column_name, df_train[column_name].unique())
    indicator_column = tf.feature_column.indicator_column(categorical_column)
    feature_columns.append(indicator_column)

In [8]:
# embedding columns
breed1 = tf.feature_column.categorical_column_with_vocabulary_list('Breed1', df_train.Breed1.unique())
breed1_embedding = tf.feature_column.embedding_column(breed1, dimension=8)
feature_columns.append(breed1_embedding)

In [9]:
# crossed columns
age = tf.feature_column.numeric_column('Age')
age_buckets = tf.feature_column.bucketized_column(age, boundaries=[1, 3, 5])

animal_type = tf.feature_column.categorical_column_with_vocabulary_list('Type', ['Cat', 'Dog'])

age_type_feature = tf.feature_column.crossed_column([age_buckets, animal_type], hash_bucket_size=100)
feature_columns.append(tf.feature_column.indicator_column(age_type_feature))

In [10]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [11]:
def build_dataset(df_data, batch_size, shuffle=True):
    x = df_data.copy()
    y = x.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(x), y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(x))
    ds = ds.batch(batch_size)
    return ds

In [12]:
batch_size = 32
train_ds = build_dataset(df_train, batch_size)
val_ds = build_dataset(df_val, batch_size, shuffle=False)
test_ds = build_dataset(df_test, batch_size, shuffle=False)

In [13]:
model = tf.keras.Sequential([
    feature_layer,
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dropout(.1),
    layers.Dense(1)
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['accuracy']
)

model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)

Epoch 1/10
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1f1550e74c0>

In [14]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.7352686524391174
