In [2]:
#@title Imports
import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

ImportError: No module named data

In [0]:
#@title Get Data
california_housing_dataframe = pd.read_csv("https://storage.googleapis.com/mledu-datasets/california_housing_train.csv", sep=",")
california_housing_dataframe = california_housing_dataframe.reindex(
    np.random.permutation(california_housing_dataframe.index))
california_housing_dataframe.describe()

train_percent = 70 #@param
train_size = int(california_housing_dataframe.shape[0]*0.722)
test_size = california_housing_dataframe.shape[0] - train_size
train_df = california_housing_dataframe.head(train_size)[:]
test_df = california_housing_dataframe.tail(test_size)[:]
del california_housing_dataframe

In [0]:
target_col = 'median_house_value'
feature_cols = ['total_rooms','total_bedrooms','population','households','median_income']


In [0]:
# Define the input feature: total_rooms.
my_feature_train = train_df[feature_cols]
my_feature_test = test_df[feature_cols]

# Configure a numeric feature column for total_rooms.
feature_columns = []
for f in feature_cols:
  feature_columns.append(tf.feature_column.numeric_column(f))

targets_train = train_df[target_col]
targets_test = test_df[target_col]


In [0]:
def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.

    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """

    # Convert pandas data into a dict of np arrays.
    features = {key:np.array(value) for key,value in dict(features).items()}                                           

    # Construct a dataset, and configure batching/repeating
    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)

    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

## LINEAR REGRESSION

In [0]:
# Use gradient descent as the optimizer for training the model.
my_optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.0000001)
my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)

# Configure the linear regression model with our feature columns and optimizer.
# Set a learning rate of 0.0000001 for Gradient Descent.
linear_regressor = tf.estimator.LinearRegressor(
    feature_columns=feature_columns,
    optimizer=my_optimizer
)

In [0]:
#@title TRAIN
train_input_fn = lambda:my_input_fn(my_feature_train,
                                    targets_train,
                                    batch_size=100,
                                    num_epochs=4)
_ = linear_regressor.train(
    input_fn = train_input_fn)

In [110]:
pred_input_fn_train =lambda: my_input_fn(my_feature_train,
                                         targets_train, 
                                         num_epochs=1,
                                         shuffle=False)

pred_input_fn_test =lambda: my_input_fn(my_feature_test,
                                         targets_test, 
                                         num_epochs=1,
                                         shuffle=False)

def get_preds_with_metric(model,input_fn,targets,desc,is_dict=True):
  # Call predict() on the linear_regressor to make predictions.
  predictions = model.predict(input_fn=input_fn)
  if is_dict:
    preds = [pred['predictions'][0] for pred in predictions]
  else:
    preds = [pred for pred in predictions]
  mean_squared_error = metrics.mean_squared_error(preds, targets)
  root_mean_squared_error = math.sqrt(mean_squared_error)
  print "Mean value of predictions: %f, Mean of Targets: %f"%(np.mean(preds),np.mean(targets))
  print "Mean Squared Error (on %s data): %0.3f" %(desc, mean_squared_error)
  print "Root Mean Squared Error (on %s data): %0.3f" %(desc, root_mean_squared_error)
  
get_preds_with_metric(linear_regressor,pred_input_fn_train,targets_train,'training')
get_preds_with_metric(linear_regressor,pred_input_fn_test,targets_test,'test')

Mean value of predictions: 2.647816, Mean of Targets: 207017.840802
Mean Squared Error (on training data): 56315851466.948
Root Mean Squared Error (on training data): 237309.611
Mean value of predictions: 2.683237, Mean of Targets: 208036.083792
Mean Squared Error (on test data): 56704733651.562
Root Mean Squared Error (on test data): 238127.558


##DNN RERESSOR

In [0]:
dnn_regressor = tf.estimator.DNNRegressor(
      feature_columns=feature_columns,
      hidden_units=[10,10]
  )
dnn_regressor.train(input_fn = train_input_fn)


In [112]:
get_preds_with_metric(dnn_regressor,pred_input_fn_train,targets_train,'training')
get_preds_with_metric(dnn_regressor,pred_input_fn_test,targets_test,'test')

Mean value of predictions: 131470.515625, Mean of Targets: 207017.840802
Mean Squared Error (on training data): 27866569704.388
Root Mean Squared Error (on training data): 166932.830
Mean value of predictions: 133315.437500, Mean of Targets: 208036.083792
Mean Squared Error (on test data): 27395570065.701
Root Mean Squared Error (on test data): 165516.072


## CUSTOM ESTIMATOR

In [0]:
def my_model_fn(
   features, # This is batch_features from input_fn
   labels,   # This is batch_labels from input_fn
   mode):    # Instance of tf.estimator.ModeKeys, see below
  # Create the layer of input
  input_layer = tf.feature_column.input_layer(features, feature_columns)
    
  # Definition of hidden layer: h1
  # (Dense returns a Callable so we can provide input_layer as argument to it)
  h1 = tf.layers.Dense(10, activation=tf.nn.relu)(input_layer)

  # Definition of hidden layer: h2
  # (Dense returns a Callable so we can provide h1 as argument to it)
  h2 = tf.layers.Dense(10, activation=tf.nn.relu)(h1)
  # Output 'logits' layer is three numbers = probability distribution
  # (Dense returns a Callable so we can provide h2 as argument to it)
  logits = tf.layers.Dense(1)(h2)
  predictions = tf.squeeze(logits, 1)# Convert to shape [batch_size]
  if mode == tf.estimator.ModeKeys.TRAIN:
    loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions)
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode,
                                      loss=loss,
                                      train_op=train_op)

  
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode, predictions=predictions)


In [80]:
custom_regressor = tf.estimator.Estimator(
    model_fn=my_model_fn,
    model_dir='/Users/sunupi/Desktop/Checkpoints')  # Path to where checkpoints etc are stored

custom_regressor.train(input_fn=train_input_fn,steps=30000)

<tensorflow.python.estimator.estimator.Estimator at 0x7f92b6567e10>

In [113]:
get_preds_with_metric(custom_regressor,pred_input_fn_train,targets_train,'training',is_dict=False)
get_preds_with_metric(custom_regressor,pred_input_fn_test,targets_test,'test',is_dict=False)

Mean value of predictions: 133536.421875, Mean of Targets: 207017.840802
Mean Squared Error (on training data): 27550601947.960
Root Mean Squared Error (on training data): 165983.740
Mean value of predictions: 135381.781250, Mean of Targets: 208036.083792
Mean Squared Error (on test data): 27079192156.630
Root Mean Squared Error (on test data): 164557.565


## Custom Deep and Wide

In [0]:
def my_model_fn(
   features, # This is batch_features from input_fn
   labels,   # This is batch_labels from input_fn
   mode):    # Instance of tf.estimator.ModeKeys, see below
  # Create the layer of input
  input_layer = tf.feature_column.input_layer(features, feature_columns)
  
  h1 = tf.layers.Dense(10, activation=tf.nn.relu)(input_layer)
  
  # Definition of hidden layer: h1
  # (Dense returns a Callable so we can provide input_layer as argument to it)
  h1 = tf.layers.Dense(10, activation=tf.nn.relu)(input_layer)

  # Definition of hidden layer: h2
  # (Dense returns a Callable so we can provide h1 as argument to it)
  h2 = tf.layers.Dense(10, activation=tf.nn.relu)(h1)
  # Output 'logits' layer is three numbers = probability distribution
  # (Dense returns a Callable so we can provide h2 as argument to it)
  logits = tf.layers.Dense(1)(h2)
  predictions = tf.squeeze(logits, 1)# Convert to shape [batch_size]
  if mode == tf.estimator.ModeKeys.TRAIN:
    loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions)
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode,
                                      loss=loss,
                                      train_op=train_op)

  
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode, predictions=predictions)

In [114]:
tf.layers.flatten()

NameError: ignored