[View in Colaboratory](https://colab.research.google.com/github/santoshgurujula/ML_TF/blob/master/pedictVolume_colab.ipynb)

In [5]:
import tensorflow as tf
import numpy as np
import shutil
print(tf.__version__)

1.10.1


In [0]:
CSV_COLUMNS = ['radius','height','volume']
LABEL_COLUMN = 'volume'
DEFAULTS = [[0.0], [0.0], [0.0]]

def read_dataset(filename, mode, batch_size = 512):
  def _input_fn():
    def decode_csv(value_column):
      columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
      features = dict(zip(CSV_COLUMNS, columns))
      label = features.pop(LABEL_COLUMN)
      return features, label

    # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
    filenames_dataset = tf.data.Dataset.list_files(filename)
    # Read lines from text files
    textlines_dataset = filenames_dataset.flat_map(tf.data.TextLineDataset)
    # Parse text lines as comma-separated values (CSV)
    dataset = textlines_dataset.map(decode_csv)
    
    # Note:
    # use tf.data.Dataset.flat_map to apply one to many transformations (here: filename -> text lines)
    # use tf.data.Dataset.map      to apply one to one  transformations (here: text line -> feature list)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = None # loop indefinitely
        dataset = dataset.shuffle(buffer_size = 10 * batch_size)
    else:
        num_epochs = 1 # end-of-input after this

    dataset = dataset.repeat(num_epochs).batch(batch_size)
    
    return dataset.make_one_shot_iterator().get_next()
  return _input_fn
    

def get_train():
  return read_dataset('./ML_TF/volume-train.csv', mode = tf.estimator.ModeKeys.TRAIN)

def get_valid():
  return read_dataset('./ML_TF/volume-valid.csv', mode = tf.estimator.ModeKeys.EVAL)

def get_test():
  return read_dataset('./ML_TF/volume-test.csv', mode = tf.estimator.ModeKeys.EVAL)

In [0]:
INPUT_COLUMNS = [
    tf.feature_column.numeric_column('radius'),
    tf.feature_column.numeric_column('height'),
]

def add_more_features(feats):
  # Nothing to add (yet!)
  return feats

feature_cols = add_more_features(INPUT_COLUMNS)

In [15]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# List .txt files in the root.
#
# Search query reference:
# https://developers.google.com/drive/v2/web/search-parameters
listed = drive.ListFile({'q': "title contains '.csv' and 'root' in parents"}).GetList()
for file in listed:
  print('title {}, id {}'.format(file['title'], file['id']))

title volume-test.csv, id 1uy39AC75Oo2MmMfd73bktPthXrm0-85X
title volume-valid.csv, id 1Ev_aeAgvyoJnYPcPN6_40X4QimDByw3C
title volume-train.csv, id 1o1QdWY6HZsz7ChdKPpxg0rnqARM_Wh4X


In [0]:
import os
download_path = os.path.expanduser('~/data')
try:
  os.makedirs(download_path)
except FileExistsError:
  pass

In [14]:
output_file = os.path.join(download_path, 'volume-train.csv')
temp_file = drive.CreateFile({'id': '1o1QdWY6HZsz7ChdKPpxg0rnqARM_Wh4X'})
temp_file.GetContentFile(output_file)

InvalidConfigError: ignored

In [16]:
!git clone https://github.com/santoshgurujula/ML_TF

Cloning into 'ML_TF'...
remote: Counting objects: 11, done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 11 (delta 1), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (11/11), done.


In [20]:
tf.logging.set_verbosity(tf.logging.INFO)
OUTDIR = 'volume_trained3'
#TensorBoard().start(OUTDIR)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
model = tf.estimator.LinearRegressor(
      feature_columns = feature_cols, model_dir = OUTDIR)
model.train(input_fn = get_train(), max_steps = 3000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'volume_trained3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f90bffb1898>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into volume_trained3/model.ckpt.
INFO:tenso

<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x7f90bffb1fd0>

In [21]:
def print_rmse(model, name, input_fn):
  metrics = model.evaluate(input_fn = input_fn, steps = None)
  print('RMSE on {} dataset = {}'.format(name, np.sqrt(metrics['average_loss'])))
print_rmse(model, 'validation', get_valid())

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-24-09:38:42
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from volume_trained3/model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-24-09:38:42
INFO:tensorflow:Saving dict for global step 3000: average_loss = 7.627669e+17, global_step = 3000, label/mean = 583032100.0, loss = 3.8138343e+20, prediction/mean = 43370.723
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 3000: volume_trained3/model.ckpt-3000
RMSE on validation dataset = 873365248.0


In [22]:
print_rmse(model, 'validation', get_test())

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-24-09:39:00
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from volume_trained3/model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-24-09:39:00
INFO:tensorflow:Saving dict for global step 3000: average_loss = 1.7619267e+21, global_step = 3000, label/mean = 30846015000.0, loss = 3.224326e+23, prediction/mean = 47790.754
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 3000: volume_trained3/model.ckpt-3000
RMSE on validation dataset = 41975308288.0
