# Feature columns bridge the gap between columns in a CSV file to the features used to train a model

In [2]:
import tensorflow as tf
import numpy as np


In [None]:
# Under the hood: Feature columns take care of packing the inputs into the input vector of the model
featcols = [
    tf.feature_column.numeric_column("sq_footage"),
    tf.feature_column.categorical_column_with_vocabulary_list("type", ["house", "apt"]) # perform "one-hot" encoding
    # "house" -> 1, 0
    # "apt" -> 0, 1
]

# `fc.bucketized_column` splits a numeric feature into categories based on numeric ranges

In [5]:
NBUCKETS = 16
# setup numeric ranges
latbuckets = np.linspace(start=38.0, stop=42.0, num=NBUCKETS).tolist()
lonbuckets = np.linspace(start=-76.0, stop=-72.0, num=NBUCKETS).tolist()

fc = tf.feature_column
# create bucketized columns for pickup latitude and pickup longitude
fc_bucketized_plat = fc.bucketized_column(
    source_column=fc.numeric_column("pickup_longitude"),
    boundaries=lonbuckets
)
fc_bucketized_plon = fc.bucketized_column(
    source_column=fc.bucketized_column("pickup_latitude"),
    boundaries=latbuckets
)

TypeError: bucketized_column() missing 1 required positional argument: 'boundaries'

# Representing feature columns as sparse vectors

In [4]:
# if you know the keys beforehand:
tf.feature_column.categorical_column_with_vocabulary_list('zipcode', vocabulary_list=['12345', '45678', '78900'])

# if your data is already indexed, i.e. has integers in [0-N):
tf.feature_column.categorical_column_with_identity('schoolsRatings', num_buckets=2)

# if you do not have a vocabulary of all possible values:
tf.feature_column.categorical_column_with_hash_bucket('nearStoreID', hash_bucket_size=500)

HashedCategoricalColumn(key='nearStoreID', hash_bucket_size=500, dtype=tf.string)

`fc.embedding_column` represents data as a lower-dimensional, dense vector inm which each cell contains a number, not just 0 or 1
```jupyterpython
fc_ploc = fc.embedding_column(categorical_column=fc_crossed_ploc, dimension=3)
```