# Pandas Solution

In [5]:
import pandas as pd 
from sklearn import datasets
import tensorflow as tf
import itertools

In [None]:
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
           "dis", "tax", "ptratio", "medv"]

In [None]:
training_set= pd.read_csv('../data/boston_train.csv', skipinitialspace=True,skiprows=1, names=COLUMNS)
test_set= pd.read_csv('../data/boston_test.csv', skipinitialspace=True,skiprows=1, names=COLUMNS)
prediction_set= pd.read_csv('../data/boston_predict.csv', skipinitialspace=True,skiprows=1, names=COLUMNS)

In [None]:
print(training_set.shape, test_set.shape, prediction_set.shape)

In [None]:
FEATURES = ["crim", "zn", "indus", "nox", "rm","age", "dis", "tax", "ptratio"]
LABEL = "medv"

## Convert the data

In [None]:
feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]

## Define the estimator

In [None]:
estimator = tf.estimator.LinearRegressor(
    feature_columns=feature_cols,
    model_dir="train"
)

### instruction how to feed the data using pandas_input_fn.This object needs 5 parameters:
* x:feature data
* y:label data
* batch_size:batch. By default 128
* num_epoch: Number of epoch, by default 1
* shuffle: Shuffle or not the data. By default, None

In [None]:
# in order to feed multiple times we use function get_input_in
def get_input_fn(data_set, num_epochs=None, n_batch=128, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
    x=pd.DataFrame({k:data_set[k].values for k in FEATURES}),
    y = pd.Series(data_set[LABEL].values),
    batch_size=n_batch,
    num_epochs=num_epochs,
    shuffle=shuffle)

## Train the model

In [None]:
estimator.train(input_fn=get_input_fn(training_set,
                                     num_epochs=None,
                                     n_batch=128,
                                     shuffle=False), 
               steps=1000)

## Evaluate your model

In [None]:
ev = estimator.evaluate(
    input_fn=get_input_fn(test_set,
                         num_epochs=1,
                         n_batch=128,
                         shuffle=False)

)

In [None]:
# the loss with the code below
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))

In [None]:
training_set['medv'].describe()

## Make the prediction

In [None]:
y = estimator.predict(
    input_fn=get_input_fn(prediction_set,
                          num_epochs=1,
                          n_batch = 128,
                          shuffle=False
    )
)

In [None]:
predictions = list(p["predictions"]for p in itertools.islice(y,6))
print("Predictions: {}".format(str(predictions)))

# Numpy Solution

In [None]:
training_set_n = pd.read_csv("../data/boston_train.csv").values
test_set_n = pd.read_csv("../data/boston_test.csv").values
prediction_set_n = pd.read_csv("../data/boston_predict.csv").values

In [None]:
def prepare_data(df):
    X_train = df[:,:-3]
    y_train = df[:,-3]
    return X_train, y_train

In [None]:
X_train, y_train = prepare_data(training_set_n)
X_test, y_test = prepare_data(test_set_n)

In [None]:
# exclude the last column from prediction dataset because it is Nan
x_predict = prediction_set_n[:,:-2]

In [None]:
print(X_train.shape, y_train.shape, x_predict.shape)

In [None]:
feature_columns = [tf.feature_column.numeric_column('x',shape=X_train.shape[1:])]

In [None]:
# estimator as before
estimator = tf.estimator.LinearRegressor(    
         feature_columns=feature_columns,    
         model_dir="train1")

In [None]:
# using numpy estimator to feed the model
# train the estimator

train_input = tf.estimator.inputs.numpy_input_fn(
    x={'x': X_train},
    y=y_train,
    batch_size=128,
    shuffle=False,
    num_epochs=None)
estimator.train(input_fn=train_input,steps=5000)

In [None]:
eval_input = tf.estimator.inputs.numpy_input_fn(
    x={'x':X_test},
    y=y_test,
    shuffle=False,
    batch_size=128,
    num_epochs=1)
estimator.evaluate(eval_input, steps=None)

In [None]:
test_input = tf.estimator.inputs.numpy_input_fn(
    x={"x":x_predict},
    batch_size=128,
    num_epochs=1,
    shuffle=False)
y = estimator.predict(test_input)
predictions = list(p["predictions"]for p in itertools.islice(y,6))
print("Predictions: {}".format(str(predictions)))

# Tensorflow solution

In [2]:
df_train = '../data/boston_train.csv'
df_eval = '../data/boston_test.csv'

In [4]:
# feature selected to be used
COLUMNS = ["crim", "zn", "indus", "nox", "rm", "age",
                "dis", "tax", "ptratio", "medv"]
# type of variable
RECORDS_ALL = [[0.0], [0.0], [0.0], [0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0]]

## Define the input_fn 
the function can be broken into three part:
1. Import the data
2. Create the iterator
3. Consume the data

In [11]:
def input_fn(data_file, batch_size, num_epoch = None):
    # Step 1
    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults=RECORDS_ALL)
        features = dict(zip(COLUMNS,columns))
        labels = features.pop('medv')
        return features, labels
    # Extract lines from input files using the Dataset API.
    dataset = (tf.data.TextLineDataset(data_file)).skip(1).map(parse_csv)
    
    # Read text file
    # Skip header row
#     dataset.skip(1).map(parse_csv)
    
    dataset = dataset.repeat(num_epoch)
    dataset = dataset.batch(batch_size)
    # Step 3
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()
    return features, labels

In [13]:
# Step 4 Consume the data
next_batch = input_fn(df_train, batch_size=1, num_epoch=None)
with tf.Session() as sess:
    first_batch = sess.run(next_batch)
    print(first_batch)

({'crim': array([2.3004], dtype=float32), 'zn': array([0.], dtype=float32), 'indus': array([19.58], dtype=float32), 'nox': array([0.605], dtype=float32), 'rm': array([6.319], dtype=float32), 'age': array([96.1], dtype=float32), 'dis': array([2.1], dtype=float32), 'tax': array([403.], dtype=float32), 'ptratio': array([14.7], dtype=float32)}, array([23.8], dtype=float32))


In [14]:
# step 4 Define the feature column
X1 = tf.feature_column.numeric_column('crim')
X2 = tf.feature_column.numeric_column('zn')
X3 = tf.feature_column.numeric_column('indus')
X4 = tf.feature_column.numeric_column('nox')
X5 = tf.feature_column.numeric_column('rm')
X6 = tf.feature_column.numeric_column('age')
X7 = tf.feature_column.numeric_column('dis')
X8 = tf.feature_column.numeric_column('tax')
X9 = tf.feature_column.numeric_column('ptratio')



In [15]:
base_columns = [X1,X2, X3, X4, X5, X6, X7, X8, X9]

## Step 5) Build the model

In [17]:
model = tf.estimator.LinearRegressor(feature_columns=base_columns,
                                    model_dir='train3')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'train3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7130343d68>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [18]:
# Train the estimator
model.train(steps = 1000, 
           input_fn = lambda:input_fn(df_train,batch_size=128, num_epoch = None))

Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into train3/model.ckpt.
INFO:tensorflow:loss = 83729.64, step = 1
INFO:tensorflow:global_step/sec: 137.919
INFO:tensorflow:loss = 13909.657, step = 101 (0.727 sec)
INFO:tensorflow:global_step/sec: 160.816
INFO:tensorflow:loss = 12881.449, step = 201 (0.620 sec)
INFO:tensorflow:global_step/sec: 168.666
INFO:tensorflow:loss = 12391.541, step = 301 (0.593 sec)
INFO:tensorflow:global_step/sec: 138.078
INFO:tensorflow:loss = 12050.5625, step = 401 (0.724 sec)
INFO:tensorflow:global_step/sec: 150.744
INFO:tensorflow:loss = 11766.134, step = 501 (0.664 sec)
INFO:tensorflow:global_step/sec: 145.927
INFO

<tensorflow_estimator.python.estimator.canned.linear.LinearRegressor at 0x7f7130343588>

In [20]:
results = model.evaluate(steps=None, input_fn=lambda: input_fn(df_eval,
                                                              batch_size=128,
                                                              num_epoch = 1))
for key in results:
    print("   {}, was: {}".format(key, results[key]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-20T14:41:35Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from train3/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-20-14:41:35
INFO:tensorflow:Saving dict for global step 1000: average_loss = 32.15896, global_step = 1000, label/mean = 22.08, loss = 3215.896, prediction/mean = 22.404533
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: train3/model.ckpt-1000
   average_loss, was: 32.158958435058594
   label/mean, was: 22.079999923706055
   loss, was: 3215.89599609375
   prediction/mean, was: 22.40453338623047
   global_step, was: 1000


In [22]:
prediction_input = {
          'crim': [0.03359,5.09017,0.12650,0.05515,8.15174,0.24522],
          'zn': [75.0,0.0,25.0,33.0,0.0,0.0],
          'indus': [2.95,18.10,5.13,2.18,18.10,9.90],
          'nox': [0.428,0.713,0.453,0.472,0.700,0.544],
          'rm': [7.024,6.297,6.762,7.236,5.390,5.782],
          'age': [15.8,91.8,43.4,41.1,98.9,71.7],
          'dis': [5.4011,2.3682,7.9809,4.0220,1.7281,4.0317],
          'tax': [252,666,284,222,666,304],
          'ptratio': [18.3,20.2,19.7,18.4,20.2,18.4]
     }
def test_input_fn():
    dataset = tf.data.Dataset.from_tensors(prediction_input)
    return dataset

In [23]:
# predict all our prediction_input
pred_results = model.predict(input_fn=test_input_fn)

In [24]:
for pred in enumerate(pred_results):
    print(pred)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from train3/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
(0, {'predictions': array([32.297546], dtype=float32)})
(1, {'predictions': array([18.96125], dtype=float32)})
(2, {'predictions': array([27.270979], dtype=float32)})
(3, {'predictions': array([29.299236], dtype=float32)})
(4, {'predictions': array([16.436684], dtype=float32)})
(5, {'predictions': array([21.460876], dtype=float32)})
