### Step 1: Import the training data

In [1]:
import pandas as pd

In [2]:
housing = pd.read_csv('/Users/jamessmith/Documents/Tensorflow-Bootcamp-master/02-TensorFlow-Basics/cal_housing_clean.csv')

In [3]:
housing.head()

Unnamed: 0,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


### Step 2: Perform the Train-Test Split

In [4]:
y_val = housing['medianHouseValue'] # This is what we are trying to predict

In [5]:
x_data = housing.drop('medianHouseValue', axis = 1 ) # This is the remainder of the features - used for prediction

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_val, test_size = 0.3, random_state = 101)

In [8]:
print(x_train.size) # 70% of x_data
print(x_test.size) # 30% of ""
print(y_train.size) # 70% of y_val
print(y_test.size) # 30% of ""
# y_val.size is 20639 (1 col), x_data.size is 6 * y_val (6 col) & housing = x _data + y_val

86688
37152
14448
6192


### Step 3: Scale the Feature Data

In [9]:
from sklearn.preprocessing import MinMaxScaler

In [10]:
scaler = MinMaxScaler()

In [11]:
scaler.fit(x_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [12]:
x_train = pd.DataFrame(data=scaler.transform(x_train), columns = x_train.columns, index = x_train.index)

In [13]:
x_test = pd.DataFrame(data=scaler.transform(x_test), columns = x_test.columns, index = x_test.index)

In [14]:
print(x_train) # How have we scaled the data? - zero mean & unit variance, why though?

       housingMedianAge  totalRooms  totalBedrooms  population  households  \
6761           0.352941    0.069688       0.117163    0.048769    0.115442   
3010           0.607843    0.011242       0.015673    0.008367    0.014142   
7812           0.666667    0.025230       0.031347    0.020971    0.030258   
8480           0.666667    0.032530       0.033830    0.024752    0.030094   
1051           0.294118    0.031919       0.035692    0.019466    0.034863   
16312          0.607843    0.113739       0.144010    0.076112    0.145206   
2042           0.431373    0.053283       0.059590    0.031789    0.066436   
1755           0.882353    0.048298       0.045624    0.029059    0.057721   
16022          1.000000    0.061015       0.053538    0.028323    0.053774   
20441          0.450980    0.103515       0.090161    0.064104    0.092748   
13243          0.156863    0.098555       0.075885    0.063439    0.080414   
6450           0.843137    0.050994       0.044538    0.032139  

### Step 4: Create the Feature Columns - Transform the data into formats the estimators can use

In [15]:
housing.columns

Index(['housingMedianAge', 'totalRooms', 'totalBedrooms', 'population',
       'households', 'medianIncome', 'medianHouseValue'],
      dtype='object')

In [16]:
import tensorflow as tf

In [17]:
age = tf.feature_column.numeric_column('housingMedianAge')
bedrooms = tf.feature_column.numeric_column('totalBedrooms')
pop = tf.feature_column.numeric_column('population')
households = tf.feature_column.numeric_column('households')
income = tf.feature_column.numeric_column('medianIncome')

In [18]:
feat_cols = [age, bedrooms, pop, households, income]

### Step 5: Create Input Function for Estimator Object

In [19]:
input_func = tf.estimator.inputs.pandas_input_fn(x = x_train, y = y_train, batch_size = 10, num_epochs = 1000, shuffle = True)

### Step 6: Create the Estimator Model (DNN Regressor)

In [20]:
model = tf.estimator.DNNRegressor(hidden_units=[12,12,12], feature_columns = feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/89/twfbkbns2g52_wr7z25fqz900000gp/T/tmpqxnp0zyg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fbd0a42d6a0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


### Step 7: Train the Model

In [21]:
model.train(input_fn = input_func, steps = 1000)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/89/twfbkbns2g52_wr7z25fqz900000gp/T/tmpqxnp0zyg/model.ckpt.
INFO:tensorflow:loss = 831592100000.0, step = 1
INFO:tensorflow:global_step/sec: 334.44
INFO:tensorflow:loss = 413211800000.0, step = 101 (0.299 sec)
INFO:tensorflow:global_step/sec: 692.161
INFO:tensorflow:loss = 418990130000.0, step = 201 (0.145 sec)
INFO:tensorflow:global

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x7fbd0a42def0>

### Step 8: Create the prediction function

In [22]:
predict_input_func = tf.estimator.inputs.pandas_input_fn(x = x_test, batch_size = 10, num_epochs = 1, shuffle = False)

In [23]:
pred_gen = model.predict(predict_input_func)

In [24]:
predictions = list(pred_gen)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /var/folders/89/twfbkbns2g52_wr7z25fqz900000gp/T/tmpqxnp0zyg/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [25]:
predictions

[{'predictions': array([223691.66], dtype=float32)},
 {'predictions': array([268266.53], dtype=float32)},
 {'predictions': array([219508.3], dtype=float32)},
 {'predictions': array([177530.31], dtype=float32)},
 {'predictions': array([239146.47], dtype=float32)},
 {'predictions': array([195561.81], dtype=float32)},
 {'predictions': array([227003.53], dtype=float32)},
 {'predictions': array([203802.25], dtype=float32)},
 {'predictions': array([203778.44], dtype=float32)},
 {'predictions': array([168279.45], dtype=float32)},
 {'predictions': array([202676.8], dtype=float32)},
 {'predictions': array([219806.17], dtype=float32)},
 {'predictions': array([190604.33], dtype=float32)},
 {'predictions': array([175400.34], dtype=float32)},
 {'predictions': array([253805.31], dtype=float32)},
 {'predictions': array([170342.3], dtype=float32)},
 {'predictions': array([204395.06], dtype=float32)},
 {'predictions': array([178658.12], dtype=float32)},
 {'predictions': array([171601.88], dtype=float32

### Step 9: Calculate the RMSE

In [26]:
final_preds = []

for pred in predictions:
    final_preds.append(pred['predictions'])

In [27]:
from sklearn.metrics import mean_squared_error

In [28]:
mean_squared_error(y_test, final_preds)**0.5 # Is this the error in our model? i.e the value we want to minimize...

105354.27038126365