In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import shutil

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv('./train.csv', index_col='ID')

In [3]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 333 entries, 1 to 506
Data columns (total 14 columns):
crim       333 non-null float64
zn         333 non-null float64
indus      333 non-null float64
chas       333 non-null int64
nox        333 non-null float64
rm         333 non-null float64
age        333 non-null float64
dis        333 non-null float64
rad        333 non-null int64
tax        333 non-null int64
ptratio    333 non-null float64
black      333 non-null float64
lstat      333 non-null float64
medv       333 non-null float64
dtypes: float64(11), int64(3)
memory usage: 39.0 KB


In [4]:
train = train_df[:222]
test = train_df[222:]

train[['medv','crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']].to_csv('./train_headless.csv', index=False, header=False)
test[['medv','crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']].to_csv('./valid_headless.csv', index=False, header=False)

# Define Feature Columns

In [5]:
crim = tf.feature_column.numeric_column('crim', dtype=tf.float64)
zn = tf.feature_column.numeric_column('zn', dtype=tf.float64)
indus = tf.feature_column.numeric_column('indus', dtype=tf.float64)
chas = tf.feature_column.numeric_column('chas', dtype=tf.int64)
nox = tf.feature_column.numeric_column('nox', dtype=tf.float64)
rm = tf.feature_column.numeric_column('rm', dtype=tf.float64)
age = tf.feature_column.numeric_column('age', dtype=tf.float64)
dis = tf.feature_column.numeric_column('dis', dtype=tf.float64)
rad = tf.feature_column.numeric_column('rad', dtype=tf.int64)
tax = tf.feature_column.numeric_column('tax', dtype=tf.int64)
ptratio = tf.feature_column.numeric_column('ptratio', dtype=tf.float64)
black = tf.feature_column.numeric_column('black', dtype=tf.float64)
lstat = tf.feature_column.numeric_column('lstat', dtype=tf.float64)

In [6]:
feature_cols = [crim, zn, indus, chas, nox, rm, age, dis, rad, tax, ptratio, black, lstat]

# Prepare Data for Input Functions

In [7]:
CSV_COLUMNS = ['medv', 'crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'black', 'lstat']
LABEL_COLUMN = 'medv'
DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0], [0.0], [0.0], [0.0], [0.0], [0], [0], [0.0], [0.0], [0.0]]

def read_dataset(filename, mode, batch_size = 16):
  def _input_fn():
    def decode_csv(value_column):
      columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
      features = dict(zip(CSV_COLUMNS, columns))
      label = features.pop(LABEL_COLUMN)
      return features, label

    # Create list of files that match pattern
    file_list = tf.gfile.Glob(filename)

    # Create dataset from file list
    dataset = tf.data.TextLineDataset(file_list).map(decode_csv)
    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = None # indefinitely
        dataset = dataset.shuffle(buffer_size = 10 * batch_size)
    else:
        num_epochs = 1 # end-of-input after this

    dataset = dataset.repeat(num_epochs).batch(batch_size)
    return dataset.make_one_shot_iterator().get_next()
  return _input_fn

# Train and Eval

In [8]:
def serving_input_fn():
  feature_placeholders = {
    'crim' : tf.placeholder(tf.float64, [None]),
    'zn' : tf.placeholder(tf.float64, [None]),
    'indus' : tf.placeholder(tf.float64, [None]),
    'chas' : tf.placeholder(tf.int64, [None]),
    'nox' : tf.placeholder(tf.float64, [None]),
    'rm' : tf.placeholder(tf.float64, [None]),
    'age' : tf.placeholder(tf.float64, [None]),
    'dis' : tf.placeholder(tf.float64, [None]),
    'rad' : tf.placeholder(tf.int64, [None]),
    'tax' : tf.placeholder(tf.int64, [None]),
    'ptratio' : tf.placeholder(tf.float64, [None]),
    'black' : tf.placeholder(tf.float64, [None]),
    'lstat' : tf.placeholder(tf.float64, [None])
  }
  features = {
      key: tf.expand_dims(tensor, -1)
      for key, tensor in feature_placeholders.items()
  }
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [9]:
def train_and_evaluate(output_dir, num_train_steps):
  estimator = tf.estimator.LinearRegressor(
                       model_dir = output_dir,
                       feature_columns = feature_cols)
  train_spec=tf.estimator.TrainSpec(
                       input_fn = read_dataset('./train_headless.csv', mode = tf.estimator.ModeKeys.TRAIN),
                       max_steps = num_train_steps)
  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec=tf.estimator.EvalSpec(
                       input_fn = read_dataset('./valid_headless.csv', mode = tf.estimator.ModeKeys.EVAL),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10,  # evaluate every N seconds
                       exporters = exporter)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

# Run Training

In [26]:
# Run training    
OUTDIR = 'boston_trained'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
train_and_evaluate(OUTDIR, num_train_steps = 1000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'boston_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x109d99cc0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 10 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:

In [20]:
test_df = pd.read_csv('./test.csv', index_col='ID')
print(test[:1])

        crim   zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
ID                                                                              
337  0.03427  0.0   5.19     0  0.515  5.869  46.3  5.2311    5  224     20.2   

     black  lstat  medv  
ID                       
337  396.9    9.8  19.5  


In [21]:
%%writefile ./test_2.json
{"crim":0.03427, "zn": 0.0, "indus": 5.19, "chas": 0, "nox": 0.515, "rm": 5.869, "age": 46.3, "dis": 5.2311, "rad": 5, "tax": 224, "ptratio": 20.2, "black": 396.9, "lstat": 9.8}

Writing ./test_2.json


In [25]:
%%bash
python -c 'import tensorflow'

Traceback (most recent call last):
  File "<string>", line 1, in <module>
ModuleNotFoundError: No module named 'tensorflow'


In [23]:
%%bash
gcloud ml-engine local predict --model-dir=./boston_trained/export/exporter/1534303990 --json-instances=./test_2.json --verbosity debug

DEBUG: Running [gcloud.ml-engine.local.predict] with arguments: [--json-instances: "./test_2.json", --model-dir: "./boston_trained/export/exporter/1534303990", --verbosity: "debug"]
DEBUG: (gcloud.ml-engine.local.predict) Cannot import Tensorflow. Please verify "python -c 'import tensorflow'" works.
Traceback (most recent call last):
  File "/Users/robert/google-cloud-sdk/lib/googlecloudsdk/calliope/cli.py", line 848, in Execute
    resources = calliope_command.Run(cli=self, args=args)
  File "/Users/robert/google-cloud-sdk/lib/googlecloudsdk/calliope/backend.py", line 770, in Run
    resources = command_instance.Run(args)
  File "/Users/robert/google-cloud-sdk/lib/surface/ml_engine/local/predict.py", line 76, in Run
    framework=framework_flag)
  File "/Users/robert/google-cloud-sdk/lib/googlecloudsdk/command_lib/ml_engine/local_utils.py", line 101, in RunPredict
    raise LocalPredictRuntimeError(err)
LocalPredictRuntimeError: Cannot import Tensorflow. Please verify "python -c 'impo

In [13]:
%%bash
echo $PWD

/Users/robert/Documents/Kaggle/Boston


In [14]:
%%bash
ls $PWD/boston_trained/export/exporter/

1534303982
1534303990


In [16]:
%%bash
model_dir=$(ls ${PWD}/boston_trained/export/exporter)
gcloud ml-engine local predict \
    --model-dir=${PWD}/boston_trained/export/exporter/${model_dir} \
    --json-instances=./test.json

ERROR: (gcloud.ml-engine.local.predict) unrecognized arguments: 1534303990
Usage: gcloud ml-engine local predict --model-dir=MODEL_DIR (--json-instances=JSON_INSTANCES | --text-instances=TEXT_INSTANCES) [optional flags]
  optional flags may be  --framework | --help | --json-instances |
                         --text-instances

For detailed information on this command and its flags, run:
  gcloud ml-engine local predict --help


In [15]:
preds = estimator.predict(input_fn=val_input)

NameError: name 'estimator' is not defined

In [None]:
predictions = np.array([item['predictions'][0] for item in preds])

In [None]:
predictions

In [None]:
print(train_e)

In [None]:
print(test_e)