In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.train import Example, FloatList, Feature, Features

In [2]:
housing = fetch_california_housing()
x_train, x_test, y_train, y_test = train_test_split(housing.data, housing.target, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, random_state=42)

scaler = StandardScaler()
scaler.fit(x_train)
x_means = scaler.mean_
x_stds = scaler.scale_

In [3]:
housing = fetch_california_housing()
df_data = pd.DataFrame(np.c_[x_train, y_train], columns=housing.feature_names + housing.target_names)
df_data.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,3.5214,15.0,3.049945,1.106548,1447.0,1.605993,37.63,-122.43,1.442
1,5.3275,5.0,6.49006,0.991054,3464.0,3.44334,33.69,-117.39,1.687
2,3.1,29.0,7.542373,1.591525,1328.0,2.250847,38.44,-122.98,1.621
3,7.1736,12.0,6.289003,0.997442,1054.0,2.695652,33.55,-117.7,2.621
4,2.0549,13.0,5.312457,1.085092,3297.0,2.244384,33.93,-116.93,0.956


In [4]:
med_house_val = tf.feature_column.numeric_column('med_house_val')

In [5]:
age_mean, age_std = x_means[1], x_stds[1]  # The median age is column in 1
house_age = tf.feature_column.numeric_column(
    'house_age', 
    normalizer_fn=lambda x: (x - age_mean) / age_std
)

In [6]:
columns = [house_age, med_house_val]
feature_descriptions = tf.feature_column.make_parse_example_spec(columns)
feature_descriptions

{'house_age': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None),
 'med_house_val': FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=None)}

In [7]:
with tf.io.TFRecordWriter('my_data_with_features.tfrecords') as f:
    for x, y in zip(df_data['HouseAge'], df_data['MedHouseVal']):
        example = Example(features=Features(feature={
            'house_age': Feature(float_list=FloatList(value=[x])),
            'med_house_val': Feature(float_list=FloatList(value=[y]))
        }))
        f.write(example.SerializeToString())

In [8]:
def parse_examples(serialized_examples):
    examples = tf.io.parse_example(serialized_examples, feature_descriptions)
    targets = examples.pop('med_house_val')
    return examples, targets

batch_size = 32
dataset = tf.data.TFRecordDataset(['my_data_with_features.tfrecords'])
dataset = dataset.repeat().shuffle(10000).batch(batch_size).map(parse_examples)

In [9]:
for x_batch, y_batch in dataset:
    print(x_batch)
    print(y_batch)
    break

{'house_age': <tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[52.],
       [50.],
       [50.],
       [52.],
       [49.],
       [16.],
       [28.],
       [52.],
       [42.],
       [27.],
       [36.],
       [11.],
       [38.],
       [38.],
       [31.],
       [29.],
       [ 7.],
       [16.],
       [20.],
       [16.],
       [36.],
       [18.],
       [28.],
       [52.],
       [32.],
       [12.],
       [35.],
       [15.],
       [15.],
       [42.],
       [21.],
       [42.]], dtype=float32)>}
tf.Tensor(
[[1.625]
 [1.587]
 [1.066]
 [3.457]
 [2.682]
 [0.894]
 [0.48 ]
 [3.114]
 [1.649]
 [2.177]
 [2.882]
 [1.748]
 [2.794]
 [1.504]
 [0.938]
 [2.603]
 [1.417]
 [0.761]
 [0.991]
 [1.469]
 [4.414]
 [3.786]
 [1.79 ]
 [2.949]
 [1.5  ]
 [1.27 ]
 [1.797]
 [0.878]
 [1.804]
 [0.78 ]
 [0.847]
 [2.47 ]], shape=(32, 1), dtype=float32)
