In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras import layers

In [7]:
tf.__version__

'2.11.0'

In [13]:
dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'
csv_file = 'datasets/petfinder-mini/petfinder-mini.csv'

tf.keras.utils.get_file('petfinder_mini.zip', dataset_url,
                        extract=True, cache_dir='.')
dataframe = pd.read_csv(csv_file)

In [10]:
dataframe.head()

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,Description,PhotoAmt,AdoptionSpeed
0,Cat,3,Tabby,Male,Black,White,Small,Short,No,No,Healthy,100,Nibble is a 3+ month old ball of cuteness. He ...,1,2
1,Cat,1,Domestic Medium Hair,Male,Black,Brown,Medium,Medium,Not Sure,Not Sure,Healthy,0,I just found it alone yesterday near my apartm...,2,0
2,Dog,1,Mixed Breed,Male,Brown,White,Medium,Medium,Yes,No,Healthy,0,Their pregnant mother was dumped by her irresp...,7,3
3,Dog,4,Mixed Breed,Female,Black,Brown,Medium,Short,Yes,No,Healthy,150,"Good guard dog, very alert, active, obedience ...",8,2
4,Dog,1,Mixed Breed,Male,Black,No Color,Medium,Short,No,No,Healthy,0,This handsome yet cute boy is up for adoption....,3,2


In [16]:
# In the original dataset, `'AdoptionSpeed'` of `4` indicates
# a pet was not adopted.
dataframe['target'] = np.where(dataframe['AdoptionSpeed']==4, 0, 1)

# Drop unused features.
dataframe = dataframe.drop(columns=['AdoptionSpeed', 'Description'])

In [17]:
train, val, test = np.split(dataframe.sample(frac=1), [int(0.8*len(dataframe)), int(0.9*len(dataframe))])

In [20]:
print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

9229 training examples
1154 validation examples
1154 test examples


In [22]:
train

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,PhotoAmt,target
4565,Dog,0,Mixed Breed,Female,Brown,White,Medium,Medium,No,No,Healthy,0,5,1
1668,Dog,12,Mixed Breed,Male,Black,White,Medium,Medium,Not Sure,Not Sure,Minor Injury,0,3,0
2766,Dog,4,German Shepherd Dog,Male,Black,Brown,Large,Medium,Not Sure,Not Sure,Healthy,0,2,1
9020,Cat,3,Tabby,Female,Black,Brown,Medium,Medium,No,No,Healthy,0,5,1
2728,Dog,2,Labrador Retriever,Male,Brown,No Color,Medium,Short,Yes,No,Healthy,0,6,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10178,Cat,1,Domestic Short Hair,Male,Golden,Yellow,Medium,Short,No,No,Healthy,0,3,1
7172,Dog,3,Mixed Breed,Female,Black,Brown,Small,Short,Yes,No,Healthy,0,6,0
6466,Dog,12,Mixed Breed,Male,Brown,Golden,Large,Short,Yes,No,Healthy,0,2,0
7647,Cat,10,Domestic Short Hair,Male,Golden,No Color,Medium,Short,Yes,No,Healthy,0,5,1


In [23]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  df = dataframe.copy()
  labels = df.pop('target')
  df = {key: value[:,tf.newaxis] for key, value in dataframe.items()}
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds