In [2]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import numpy as np
import tensorflow as tf
import pandas as pd

from IPython.display import Image

In [0]:
tf.random.set_seed(1)
np.random.seed(1)

In [0]:
data = {'marks': [55,21,63,88,74,54,95,41,84,52]}
mark_boundaries =[30,40,50,60,70,80,90]

**Numeric Data**

In [0]:
marks = tf.feature_column.numeric_column(key='marks')

In [0]:
feature_layer = tf.keras.layers.DenseFeatures(marks)

In [130]:
print(feature_layer(data).numpy())

[[55.]
 [21.]
 [63.]
 [88.]
 [74.]
 [54.]
 [95.]
 [41.]
 [84.]
 [52.]]


**Bucketized columns**

In [0]:
marks_buckets = tf.feature_column.bucketized_column(marks, boundaries=mark_boundaries)
# demo(marks_buckets)

In [93]:
tf.print(input_buckets)

BucketizedColumn(source_column=NumericColumn(key='marks', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(30, 40, 50, 60, 70, 80, 90))


In [0]:
feature_layer = tf.keras.layers.DenseFeatures(marks_buckets)

In [150]:
print(data)
print(feature_layer(data).numpy())

{'marks': [55, 21, 63, 88, 74, 54, 95, 41, 84, 52]}
[[0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]]


In [0]:
grade_data = {'grade': ['average','poor','average','good','good','average','good','average','good','average']}

In [0]:
grade = tf.feature_column.categorical_column_with_vocabulary_list(
      'grade', ['poor', 'average', 'good'])

In [0]:
grade_one_hot = tf.feature_column.indicator_column(grade)

In [0]:
feature_layer = tf.keras.layers.DenseFeatures(grade_one_hot)

In [127]:
print(feature_layer(grade_data).numpy())

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]]


**Embedding columns**

In [0]:
data_point = {'point': ['c','f','c+','b+','b','c','a','d+','b+','c']}

In [0]:
df = pd.DataFrame(data_point)


In [0]:
point = tf.feature_column.categorical_column_with_vocabulary_list(
      'point', df['point'].unique())

point_embedding = tf.feature_column.embedding_column(point,dimension=4)

In [141]:
feature_layer = tf.keras.layers.DenseFeatures(point_embedding)
print(feature_layer(data_point).numpy())

[[-0.5506101   0.77287585  0.191822   -0.43982893]
 [ 0.70276004  0.18992953  0.18149893 -0.2749475 ]
 [ 0.02970489 -0.25479472  0.17840278  0.594821  ]
 [ 0.1777171   0.05057557 -0.08051997  0.00889664]
 [-0.5663202  -0.53726435 -0.08880581  0.30030224]
 [-0.5506101   0.77287585  0.191822   -0.43982893]
 [ 0.16444746 -0.72300017  0.6914677  -0.3649164 ]
 [ 0.62651634 -0.324936   -0.2612484   0.02435946]
 [ 0.1777171   0.05057557 -0.08051997  0.00889664]
 [-0.5506101   0.77287585  0.191822   -0.43982893]]


In [0]:
point_hashed = tf.feature_column.categorical_column_with_hash_bucket(
      'point', hash_bucket_size=4)


In [0]:
point_hash = tf.feature_column.indicator_column(point_hashed)

In [145]:
feature_layer = tf.keras.layers.DenseFeatures(point_hash)
print(feature_layer(data_point).numpy())

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]]


**Crossed Features**

In [0]:
data_cross = {'marks': [55,21,63,88,74,54,95,41,84,52],
        'grade': ['average','poor','average','good','good','average','good','average','good','average']}

In [0]:
crossed_feature = tf.feature_column.crossed_column([marks_buckets, grade], hash_bucket_size=10)


In [0]:
cross_hash = tf.feature_column.indicator_column(crossed_feature)

In [158]:
feature_layer = tf.keras.layers.DenseFeatures(cross_hash)
print(feature_layer(data_cross).numpy())

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]
