# Feature Columns
Tensorflow offers a Feature columns API, which is the way we can pre-process our incoming datasets to adopt the shape most appropiated for our model.
- *One important factor to consider the usage of the feature_column API is that it is embeded in the model, so the feature engineering is easier to transport-export...*


In [34]:
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
import pandas as pd

# Create the Dataset

In [11]:


dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'
file_path = '../datasets/petfinder-mini/petfinder-mini.csv'

# create dataset
test_dataset = tf.data.experimental.make_csv_dataset(file_path, 
                                                    batch_size=5, 
                                                    label_name='AdoptionSpeed')
#!head {file_path}

# Create Demo function
## 1) Set a example features dictionary
## 2) Define demo function, creates a FeatureLayer with a feature_column, then it will apply the transformations to a features dictionary

In [12]:
# A example_batch is defined as the next element in the streaming of the dataset
example_batch = next(iter(test_dataset))
# extract the features and target_label
features, target_label = example_batch
# target label is a tensor
print(type(target_label))
# Features is an ordered dictionary
print(type(features))
for key, value in features.items():
  print("{:20s}:{}".format(key, value))

# Demo function
def demo(feature_column):
  # Create a feature layer (a Dense Features Layer) with feature_column
  feature_layer = layers.DenseFeatures(feature_column)
  # Prints how the data looks like after the transformation
  print(feature_layer(features).numpy())


<class 'tensorflow.python.framework.ops.EagerTensor'>
<class 'collections.OrderedDict'>
Type                :[b'Cat' b'Cat' b'Cat' b'Cat' b'Dog']
Age                 :[12 12 12  9 72]
Breed1              :[b'Domestic Medium Hair' b'Domestic Short Hair' b'Domestic Short Hair'
 b'Domestic Short Hair' b'Terrier']
Gender              :[b'Male' b'Female' b'Female' b'Male' b'Female']
Color1              :[b'Brown' b'Black' b'Brown' b'Golden' b'Brown']
Color2              :[b'No Color' b'Yellow' b'No Color' b'White' b'Cream']
MaturitySize        :[b'Medium' b'Medium' b'Medium' b'Medium' b'Medium']
FurLength           :[b'Medium' b'Short' b'Short' b'Short' b'Medium']
Vaccinated          :[b'No' b'No' b'No' b'Yes' b'Yes']
Sterilized          :[b'Yes' b'No' b'No' b'Yes' b'Yes']
Health              :[b'Healthy' b'Healthy' b'Healthy' b'Healthy' b'Healthy']
Fee                 :[0 0 0 0 0]
Description         :[b'Hi, Any pet lover, nak donate seekor kucing kesayangan. Kesihatan memuaskan. Nak adopt

# Numeric Feature Columns


In [13]:
photo_count = feature_column.numeric_column('PhotoAmt')
demo(photo_count)

[[3.]
 [1.]
 [1.]
 [3.]
 [3.]]


# Bucketized Columns

In [14]:

age = feature_column.numeric_column('Age')
age_buckets = feature_column.bucketized_column(age, boundaries=[1,3,5])
demo(age_buckets)

[[0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]]


# Categorical Columns

In [15]:
animal_type = feature_column.categorical_column_with_vocabulary_list('Type', ['Cat','Dog'])
animal_type_one_hot = feature_column.indicator_column(animal_type)
demo(animal_type_one_hot)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]


# Embedding Columns

In [40]:

# Get all values of Breed1 column with a panda dataframe
breed_df = pd.read_csv(file_path, usecols=['Breed1'])
breed_df
""""
for item in (features['Breed1'].numpy()):
    print(item)
unique = {val for val in (features['Breed1'].numpy())}
print(unique)
"""
    
#breed1 = feature_column.categorical_column_with_vocabulary_list('Breed1', dataframe.Breed1)

'"\nfor item in (features[\'Breed1\'].numpy()):\n    print(item)\nunique = {val for val in (features[\'Breed1\'].numpy())}\nprint(unique)\n'