In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import pandas as pd

In [3]:
dataset_path = tf.keras.utils.get_file("auto-mpg.data",
                                      ("http://archive.ics.uci.edu/ml/machine-learning"
 "-databases/auto-mpg/auto-mpg.data"))

In [4]:
column_names = ['MPG', 'Cylinders', 'Displacement',
                'Horsepower', 'Weight', 'Acceleration',
                'ModelYear', 'Origin']

In [5]:
df = pd.read_csv(dataset_path, names=column_names,
                 na_values = '?', comment='\t',
                 sep=' ', skipinitialspace=True)
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [6]:
df = df.dropna()
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,ModelYear,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [7]:
import sklearn
import sklearn.model_selection
df_train, df_test = sklearn.model_selection.train_test_split(
    df, train_size=0.8)
train_stats = df_train.describe().transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,313.0,23.713099,7.745904,9.0,17.6,23.0,29.0,46.6
Cylinders,313.0,5.405751,1.690347,3.0,4.0,4.0,6.0,8.0
Displacement,313.0,188.92492,101.093516,68.0,98.0,140.0,260.0,455.0
Horsepower,313.0,102.539936,37.192403,46.0,75.0,92.0,120.0,230.0
Weight,313.0,2940.731629,836.720428,1755.0,2215.0,2725.0,3530.0,5140.0
Acceleration,313.0,15.664217,2.691403,8.0,14.0,15.5,17.2,24.8
ModelYear,313.0,76.047923,3.703897,70.0,73.0,76.0,79.0,82.0
Origin,313.0,1.594249,0.807556,1.0,1.0,1.0,2.0,3.0


In [8]:
numeric_column_names = ['Cylinders', 'Displacement',
                        'Horsepower', 'Weight',
                        'Acceleration']

In [9]:
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()

In [10]:
for col_name in numeric_column_names:
    mean = train_stats.loc[col_name, 'mean']
    std  = train_stats.loc[col_name, 'std']
    df_train_norm.loc[:, col_name] = (
        df_train_norm.loc[:, col_name] - mean)/std
    df_test_norm.loc[:, col_name] = (
        df_test_norm.loc[:, col_name] - mean)/std

In [11]:
numeric_features = []
for col_name in numeric_column_names:
    numeric_features.append(
        tf.feature_column.numeric_column(key=col_name))

In [12]:
numeric_features

[NumericColumn(key='Cylinders', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Displacement', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Horsepower', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Acceleration', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [13]:
feature_year = tf.feature_column.numeric_column(key='ModelYear')
bucketized_features = []
bucketized_features.append(
    tf.feature_column.bucketized_column(
    source_column=feature_year,
    boundaries=[73, 76, 79]))
bucketized_features

[BucketizedColumn(source_column=NumericColumn(key='ModelYear', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(73, 76, 79))]

In [16]:
feature_origin = tf.feature_column.categorical_column_with_vocabulary_list(
       key='Origin',
       vocabulary_list=[1, 2, 3])

In [17]:
categorical_indicator_features = []
categorical_indicator_features.append(
    tf.feature_column.indicator_column(feature_origin))
categorical_indicator_features

[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='Origin', vocabulary_list=(1, 2, 3), dtype=tf.int64, default_value=-1, num_oov_buckets=0))]