## 3. Create TensorFlow Models using Estimator APIs

In [None]:
import tensorflow as tf
from tensorflow import data

print 'TensorFlow: {}'.format(tf.__version__)

## Exepriment A: Train Linear Regression Model

1. Define dataset **metadata + input function** (to read and parse the data files)

2. Create **feature columns** based on metadata

3. Instantiate the **estimator** with feature columns 

4. **Train, evaluate, and predict** using the estimator and the data input function


### 1- Define Metadata &  Input Function

In [None]:
HEADER = 'weight_pounds,is_male,mother_age,mother_race,plurality,gestation_weeks,mother_married,cigarette_use,alcohol_use,key'.split(',')
TARGET_FEATURE_NAME = 'weight_pounds'
KEY_COLUMN = 'key'
DEFAULTS = [[0.0], ['null'], [0.0], ['null'], [0.0], [0.0], ['null'], ['null'], ['null'], ['nokey']]

In [None]:
def parse_csv_row(csv_row):
    
    columns = tf.decode_csv(tf.expand_dims(csv_row, -1), record_defaults=DEFAULTS)
    features = dict(zip(HEADER, columns))
    features.pop(KEY_COLUMN)
    target = features.pop(TARGET_FEATURE_NAME)
    return features, target

In [None]:
def csv_input_fn(file_name, mode=tf.estimator.ModeKeys.EVAL, 
                 skip_header_lines=0, 
                 num_epochs=1, 
                 batch_size=500):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    file_names = tf.matching_files(file_name)

    dataset = data.TextLineDataset(filenames=file_names)
    dataset = dataset.skip(skip_header_lines)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)

    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda csv_row: parse_csv_row(csv_row))
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()
    
    features, target = iterator.get_next()
    return features, target

### 2- Create Feature Columns

In [None]:
def create_feature_columns():

    is_male=tf.feature_column.categorical_column_with_vocabulary_list('is_male', ['True', 'False'])
    mother_age=tf.feature_column.numeric_column('mother_age')
    mother_race=tf.feature_column.categorical_column_with_vocabulary_list('mother_race', ['White', 'Black', 'American Indian', 'Chinese', 
               'Japanese', 'Hawaiian', 'Filipino', 'Unknown', 'Asian Indian', 'Korean', 'Samaon', 'Vietnamese'])
    plurality=tf.feature_column.numeric_column('plurality')
    gestation_weeks=tf.feature_column.numeric_column('gestation_weeks')
    mother_married=tf.feature_column.categorical_column_with_vocabulary_list('mother_married', ['True', 'False'])
    cigarette_use=tf.feature_column.categorical_column_with_vocabulary_list('cigarette_use', ['True', 'False', 'None'])
    alcohol_use=tf.feature_column.categorical_column_with_vocabulary_list('alcohol_use', ['True', 'False', 'None'])
    
    feature_columns = [is_male, mother_age, mother_race, plurality, gestation_weeks, mother_married, cigarette_use, alcohol_use]
    
    return feature_columns

### 3- Instantiate a Linear Regression Estimator

In [None]:
model_dir = os.path.join(local_models_dir,"lr_estimator")

feature_columns = create_feature_columns()

lr_estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns,
                                            model_dir=model_dir)


### 4- Train, Evaluate, and Predict

In [None]:
%%bash

ls data/babyweight

##### a) Train the model

In [None]:
import shutil

train_data_files = "data/babyweight/train.csv"

train_input_fn = lambda: csv_input_fn(train_data_files, 
                                              mode=tf.estimator.ModeKeys.TRAIN, 
                                              num_epochs=10,
                                              batch_size = 200
                                         )

# remove the following line of code to resume training
shutil.rmtree(model_dir, ignore_errors=True)

lr_estimator.train(train_input_fn, max_steps=1000)

In [None]:
%%bash

ls models/babyweight/lr_estimator

##### b) Evaluate the trained model

In [None]:
eval_data_files = "data/babyweight/eval.csv"

eval_input_fn =lambda: csv_input_fn(eval_data_files)

lr_estimator.evaluate(eval_input_fn)

##### c) Predict using the trained model

In [None]:
import itertools

predictions = lr_estimator.predict(eval_input_fn)
values = list(map(lambda item: item["predictions"][0],list(itertools.islice(predictions, 5))))
print("")
print("Predicted Values: {}".format(values))