# Import Data with Pandas

In [1]:
import pandas as pd

In [2]:
utah_vineyard = pd.read_csv('utah_lake_vineyard_no_chloro_cond.csv')

In [3]:
utah_vineyard.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat,Classification
0,15.02,8.36,16.84,90.2,0
1,14.99,8.36,16.76,90.2,0
2,14.96,8.36,16.82,90.1,0
3,14.95,8.36,17.19,90.0,0
4,14.92,8.36,16.85,89.8,0


In [4]:
utah_vineyard.shape

(18947, 5)

# Normalize Data

In [5]:
utah_vineyard.columns

Index(['Temperature', 'pH', 'Turbidity', 'ODOSat', 'Classification'], dtype='object')

In [6]:
cols_to_normalize = ['Temperature', 'pH', 'Turbidity', 'ODOSat']

In [7]:
utah_vineyard[cols_to_normalize] = utah_vineyard[cols_to_normalize].apply(lambda x: (x - x.min()) / 
                                                                          (x.max() - x.min() ))

In [8]:
utah_vineyard.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat,Classification
0,0.409329,0.330769,0.025439,0.186521,0
1,0.408021,0.330769,0.025313,0.186521,0
2,0.406713,0.330769,0.025407,0.185841,0
3,0.406277,0.330769,0.025989,0.18516,0
4,0.404969,0.330769,0.025454,0.183799,0


In [9]:
utah_vineyard.shape

(18947, 5)

# Import Tensorflow and Create Columns

In [10]:
import tensorflow as tf

In [11]:
temperature = tf.feature_column.numeric_column('Temperature')
ph = tf.feature_column.numeric_column('pH')
turbidity = tf.feature_column.numeric_column('Turbidity')
odo_sat = tf.feature_column.numeric_column('ODOSat')

In [12]:
feat_cols = [temperature, ph, turbidity, odo_sat]

In [13]:
x_data = utah_vineyard.drop('Classification', axis=1)

In [14]:
x_data.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat
0,0.409329,0.330769,0.025439,0.186521
1,0.408021,0.330769,0.025313,0.186521
2,0.406713,0.330769,0.025407,0.185841
3,0.406277,0.330769,0.025989,0.18516
4,0.404969,0.330769,0.025454,0.183799


In [15]:
labels = utah_vineyard['Classification']

# Create Test Train Split

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
X_train, X_test, y_train, y_test = train_test_split(x_data, labels, test_size=0.3, random_state=101)

In [18]:
X_train.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat
17321,0.177419,0.561538,0.054478,0.12934
7754,0.887969,0.761538,0.052512,0.502383
15906,0.170881,0.576923,0.050626,0.161334
4030,0.62816,0.438462,0.037356,0.151123
18560,0.055798,0.561538,0.047811,0.111641


In [19]:
X_train.shape

(13262, 4)

In [20]:
X_test.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat
10571,0.754141,0.646154,0.117508,0.109598
13426,0.440279,0.6,0.722486,0.112321
12773,0.639494,0.623077,0.165524,0.058543
13004,0.527899,0.723077,0.086661,0.170184
16294,0.166085,0.553846,0.077495,0.133424


In [21]:
y_train.head()

17321    0
7754     1
15906    0
4030     0
18560    0
Name: Classification, dtype: int64

In [22]:
y_test.head()

10571    0
13426    1
12773    0
13004    1
16294    0
Name: Classification, dtype: int64

# Build [Estimator](https://www.tensorflow.org/programmers_guide/estimators)

In [23]:
input_func = tf.estimator.inputs.pandas_input_fn(x = X_train, y = y_train, 
                                                 batch_size=10, num_epochs = 1000, 
                                                 shuffle = True)

In [24]:
model = tf.estimator.LinearClassifier(feature_columns = feat_cols, n_classes = 2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': 1, '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_session_config': None, '_save_checkpoints_steps': None, '_model_dir': 'C:\\Users\\MattS\\AppData\\Local\\Temp\\tmpdhx9dc0r'}


In [25]:
model.train(input_fn = input_func, steps = 10000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\MattS\AppData\Local\Temp\tmpdhx9dc0r\model.ckpt.
INFO:tensorflow:step = 1, loss = 6.93147
INFO:tensorflow:global_step/sec: 225.499
INFO:tensorflow:step = 101, loss = 7.07221 (0.454 sec)
INFO:tensorflow:global_step/sec: 340.319
INFO:tensorflow:step = 201, loss = 5.73005 (0.283 sec)
INFO:tensorflow:global_step/sec: 302.513
INFO:tensorflow:step = 301, loss = 5.31579 (0.331 sec)
INFO:tensorflow:global_step/sec: 331.031
INFO:tensorflow:step = 401, loss = 6.39266 (0.318 sec)
INFO:tensorflow:global_step/sec: 286.172
INFO:tensorflow:step = 501, loss = 6.83793 (0.349 sec)
INFO:tensorflow:global_step/sec: 300.252
INFO:tensorflow:step = 601, loss = 4.43518 (0.317 sec)
INFO:tensorflow:global_step/sec: 334.309
INFO:tensorflow:step = 701, loss = 4.66307 (0.315 sec)
INFO:tensorflow:global_step/sec: 333.427
INFO:tensorflow:step = 801, loss = 2.19827 (0.300 sec)
INFO:tensorflow:global_step/sec: 315.305
IN

INFO:tensorflow:global_step/sec: 354.355
INFO:tensorflow:step = 8401, loss = 2.15962 (0.282 sec)
INFO:tensorflow:global_step/sec: 335.152
INFO:tensorflow:step = 8501, loss = 1.64393 (0.283 sec)
INFO:tensorflow:global_step/sec: 351.677
INFO:tensorflow:step = 8601, loss = 1.74046 (0.284 sec)
INFO:tensorflow:global_step/sec: 336.955
INFO:tensorflow:step = 8701, loss = 1.50012 (0.297 sec)
INFO:tensorflow:global_step/sec: 259.925
INFO:tensorflow:step = 8801, loss = 2.1424 (0.397 sec)
INFO:tensorflow:global_step/sec: 309.138
INFO:tensorflow:step = 8901, loss = 3.85431 (0.328 sec)
INFO:tensorflow:global_step/sec: 350.894
INFO:tensorflow:step = 9001, loss = 1.56329 (0.284 sec)
INFO:tensorflow:global_step/sec: 334.283
INFO:tensorflow:step = 9101, loss = 3.45074 (0.299 sec)
INFO:tensorflow:global_step/sec: 351.812
INFO:tensorflow:step = 9201, loss = 3.99284 (0.284 sec)
INFO:tensorflow:global_step/sec: 334.954
INFO:tensorflow:step = 9301, loss = 2.65935 (0.283 sec)
INFO:tensorflow:global_step/sec

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x21171194c50>

In [26]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x = X_test, y = y_test, batch_size = 10,
                                                     num_epochs = 10, shuffle = False)

In [27]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-04-20-18:39:43
INFO:tensorflow:Restoring parameters from C:\Users\MattS\AppData\Local\Temp\tmpdhx9dc0r\model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2018-04-20-18:40:01
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.85066, accuracy_baseline = 0.786456, auc = 0.912676, auc_precision_recall = 0.735056, average_loss = 0.336368, global_step = 10000, label/mean = 0.213544, loss = 3.36368, prediction/mean = 0.224187


In [28]:
results

{'accuracy': 0.85065961,
 'accuracy_baseline': 0.78645557,
 'auc': 0.91267639,
 'auc_precision_recall': 0.73505622,
 'average_loss': 0.33636752,
 'global_step': 10000,
 'label/mean': 0.21354441,
 'loss': 3.3636754,
 'prediction/mean': 0.22418685}

# Predictions

In [29]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x = X_test, batch_size = 10,
                                                      num_epochs = 1, shuffle = False)

In [30]:
predictions = model.predict(pred_input_func)

In [31]:
my_pred = list(predictions)

INFO:tensorflow:Restoring parameters from C:\Users\MattS\AppData\Local\Temp\tmpdhx9dc0r\model.ckpt-10000


In [32]:
my_pred

[{'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.23185065], dtype=float32),
  'logits': array([-1.19789076], dtype=float32),
  'probabilities': array([ 0.76814938,  0.23185065], dtype=float32)},
 {'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logistic': array([ 0.95692801], dtype=float32),
  'logits': array([ 3.1008563], dtype=float32),
  'probabilities': array([ 0.04307194,  0.95692801], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.18515651], dtype=float32),
  'logits': array([-1.4817946], dtype=float32),
  'probabilities': array([ 0.81484348,  0.18515651], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.19719082], dtype=float32),
  'logits': array([-1.40394521], dtype=float32),
  'probabilities': array([ 0.80280918,  0.19719081], dtype=float

In [33]:
X_test.head()

Unnamed: 0,Temperature,pH,Turbidity,ODOSat
10571,0.754141,0.646154,0.117508,0.109598
13426,0.440279,0.6,0.722486,0.112321
12773,0.639494,0.623077,0.165524,0.058543
13004,0.527899,0.723077,0.086661,0.170184
16294,0.166085,0.553846,0.077495,0.133424


In [34]:
utah_vineyard.loc[[10571, 13426, 12773, 13004, 16294]]

Unnamed: 0,Temperature,pH,Turbidity,ODOSat,Classification
10571,0.754141,0.646154,0.117508,0.109598,0
13426,0.440279,0.6,0.722486,0.112321,1
12773,0.639494,0.623077,0.165524,0.058543,0
13004,0.527899,0.723077,0.086661,0.170184,1
16294,0.166085,0.553846,0.077495,0.133424,0


In [35]:
dnn_model = ''

In [36]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[5,5,5,5,5], feature_columns=feat_cols, n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': 1, '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_session_config': None, '_save_checkpoints_steps': None, '_model_dir': 'C:\\Users\\MattS\\AppData\\Local\\Temp\\tmpl9podhqg'}


In [37]:
embedded_group_col = tf.feature_column.embedding_column(labels, dimension=2)

In [38]:
feat_cols = [temperature, ph, turbidity, odo_sat, embedded_group_col]

In [39]:
input_func = tf.estimator.inputs.pandas_input_fn(X_train, y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [40]:
dnn_model.train(input_fn=input_func, steps=10000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\MattS\AppData\Local\Temp\tmpl9podhqg\model.ckpt.
INFO:tensorflow:step = 1, loss = 6.8968
INFO:tensorflow:global_step/sec: 239.583
INFO:tensorflow:step = 101, loss = 3.02233 (0.417 sec)
INFO:tensorflow:global_step/sec: 277.566
INFO:tensorflow:step = 201, loss = 1.74832 (0.376 sec)
INFO:tensorflow:global_step/sec: 256.397
INFO:tensorflow:step = 301, loss = 1.69564 (0.392 sec)
INFO:tensorflow:global_step/sec: 289.762
INFO:tensorflow:step = 401, loss = 5.71739 (0.364 sec)
INFO:tensorflow:global_step/sec: 247.097
INFO:tensorflow:step = 501, loss = 1.61906 (0.387 sec)
INFO:tensorflow:global_step/sec: 335.619
INFO:tensorflow:step = 601, loss = 4.55782 (0.295 sec)
INFO:tensorflow:global_step/sec: 286.527
INFO:tensorflow:step = 701, loss = 1.32006 (0.338 sec)
INFO:tensorflow:global_step/sec: 244.476
INFO:tensorflow:step = 801, loss = 1.12178 (0.425 sec)
INFO:tensorflow:global_step/sec: 241.504
INF

INFO:tensorflow:global_step/sec: 297.632
INFO:tensorflow:step = 8401, loss = 2.1837 (0.354 sec)
INFO:tensorflow:global_step/sec: 288.219
INFO:tensorflow:step = 8501, loss = 0.353883 (0.329 sec)
INFO:tensorflow:global_step/sec: 330.333
INFO:tensorflow:step = 8601, loss = 0.605949 (0.303 sec)
INFO:tensorflow:global_step/sec: 336.058
INFO:tensorflow:step = 8701, loss = 2.32505 (0.314 sec)
INFO:tensorflow:global_step/sec: 330.257
INFO:tensorflow:step = 8801, loss = 1.09694 (0.303 sec)
INFO:tensorflow:global_step/sec: 317.071
INFO:tensorflow:step = 8901, loss = 4.64374 (0.314 sec)
INFO:tensorflow:global_step/sec: 271.018
INFO:tensorflow:step = 9001, loss = 4.21876 (0.369 sec)
INFO:tensorflow:global_step/sec: 308.713
INFO:tensorflow:step = 9101, loss = 1.25142 (0.325 sec)
INFO:tensorflow:global_step/sec: 327.914
INFO:tensorflow:step = 9201, loss = 4.40365 (0.289 sec)
INFO:tensorflow:global_step/sec: 350.305
INFO:tensorflow:step = 9301, loss = 0.938541 (0.302 sec)
INFO:tensorflow:global_step/

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x211736da438>

In [41]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test, y=y_test, batch_size=10, num_epochs=1, shuffle=False)

In [42]:
dnn_model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-04-20-18:40:55
INFO:tensorflow:Restoring parameters from C:\Users\MattS\AppData\Local\Temp\tmpl9podhqg\model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2018-04-20-18:40:58
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.90044, accuracy_baseline = 0.786456, auc = 0.951007, auc_precision_recall = 0.837407, average_loss = 0.227514, global_step = 10000, label/mean = 0.213544, loss = 2.27314, prediction/mean = 0.212437


{'accuracy': 0.90043974,
 'accuracy_baseline': 0.78645557,
 'auc': 0.95100665,
 'auc_precision_recall': 0.83740711,
 'average_loss': 0.22751409,
 'global_step': 10000,
 'label/mean': 0.21354441,
 'loss': 2.2731416,
 'prediction/mean': 0.21243657}