In [1]:
import pandas as pd
import numpy as np
import pickle as p

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)

from sklearn.model_selection import train_test_split

Compile the training data

In [2]:
meta_data = pd.read_csv('/home/ec2-user/training_data_meta.csv')

In [3]:
#Join cleaned class data with training metadata

class_data = pd.read_csv('/home/ec2-user/stanford_labels_cleaned.csv')

In [4]:
meta_data = pd.merge(meta_data, class_data, on = 'class', how = 'left')

In [5]:
meta_data['Body Type'].value_counts()

Sedan          2075
SUV            1558
Coupe          1540
Convertible    1036
Crew Cab        381
Hatchback       380
Cab             297
Van             291
Wagon           253
Minivan         250
Quad Cab         44
Club Cab         39
Name: Body Type, dtype: int64

In [6]:
meta_data = meta_data.loc[meta_data['Body Type'].isin(['Coupe', 'Sedan'])].copy()

In [7]:
meta_data['is_sedan_target'] = (meta_data['Body Type'] == 'Sedan').astype(int)

In [8]:
image_dict = p.load(open('/home/ec2-user/scaled_grayscale_dict.p', 'rb'))

In [9]:
training_data = []
for i in meta_data[['is_sedan_target', 'fname']].iterrows():
    row = [i[1]['is_sedan_target']]
    row.extend(image_dict[i[1]['fname']].flatten())
    training_data.append(row)
training_data = np.array(training_data).astype(float)

Prep the data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(training_data[:,1:], 
                                                    training_data[:,0], 
                                                    test_size=0.33, 
                                                    random_state=42)

In [11]:
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')

y_train = np.concatenate([1 - y_train.reshape(-1,1), y_train.reshape(-1,1)], axis = 1)
y_test = np.concatenate([1 - y_test.reshape(-1,1), y_test.reshape(-1,1)], axis = 1)

Define the model

In [12]:
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
    
    #Define model architecture
    
    # Input Layer
    # Reshape X to 4-D tensor: [batch_size, width, height, channels]
    input_layer = tf.reshape(features["x"], [-1, 25, 25, 1])

    # Convolutional Layer #1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 25, 25, 1]
    # Output Tensor Shape: [batch_size, 25, 25, 32]
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 25, 25, 32]
    # Output Tensor Shape: [batch_size, 12, 12, 32]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional Layer #2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 12, 12, 32]
    # Output Tensor Shape: [batch_size, 12, 12, 64]
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 12, 12, 64]
    # Output Tensor Shape: [batch_size, 6, 6, 64]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, 64]
    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
    pool2_flat = tf.reshape(pool2, [-1, 6 * 6 * 64])

    # Dense Layer
    # Densely connected layer with 1024 neurons
    # Input Tensor Shape: [batch_size, 7 * 7 * 64]
    # Output Tensor Shape: [batch_size, 1024]
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)

    # Add dropout operation; 0.6 probability that element will be kept
    dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))

    # Logits layer
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 10]
    logits = tf.layers.dense(inputs=dropout, units=2)
    
    #Define model outputs
    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels[:,1], logits=logits)

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
          # Generate predictions (for PREDICT and EVAL mode)
          "classes": tf.argmax(input=logits, axis=1),
          # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
          # `logging_hook`.
          "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    # Configure the Training Op (for TRAIN mode)
    elif mode == tf.estimator.ModeKeys.TRAIN:
        logging_hook = tf.train.LoggingTensorHook(
            {"loss" : loss
            },
            every_n_iter=100)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, train_op=train_op, training_hooks = [logging_hook])
    # Add evaluation metrics (for EVAL mode)
    elif mode == tf.estimator.ModeKeys.EVAL:
        predictions = {
          # Generate predictions (for PREDICT and EVAL mode)
          "classes": tf.argmax(input=logits, axis=1),
          # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
          # `logging_hook`.
          "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }
        #swap this for AUROC?
        eval_metric_ops = {
          "accuracy": tf.metrics.auc(
              labels=labels, predictions=predictions["probabilities"])}

        return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
    else:
        print('Error')

In [13]:
# Load training and eval data
#mnist = tf.contrib.learn.datasets.load_dataset("mnist")
#train_data = mnist.train.images  # Returns np.array
#train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
#eval_data = mnist.test.images  # Returns np.array
#eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

# Create the Estimator
car_classifier = tf.estimator.Estimator(
  model_fn=cnn_model_fn, model_dir="/home/ec2-user/convnet_model")

# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": X_train},
  y=y_train,
  batch_size=200,
  num_epochs=None,
  shuffle=True)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/home/ec2-user/convnet_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fbf1db78550>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [15]:
car_classifier.train(
  input_fn=train_input_fn,
  steps=10000,
  #hooks=[logging_hook]
)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /home/ec2-user/convnet_model/model.ckpt-1
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /home/ec2-user/convnet_model/model.ckpt.
INFO:tensorflow:loss = 98.12841796875, step = 2
INFO:tensorflow:loss = 98.12841796875
INFO:tensorflow:global_step/sec: 1.88207
INFO:tensorflow:loss = 0.791900634765625, step = 102 (53.134 sec)
INFO:tensorflow:loss = 0.791900634765625 (53.134 sec)
INFO:tensorflow:global_step/sec: 2.07372
INFO:tensorflow:loss = 0.6902719140052795, step = 202 (48.223 sec)
INFO:tensorflow:loss = 0.6902719140052795 (48.223 sec)
INFO:tensorflow:global_st

INFO:tensorflow:loss = 0.4042145609855652 (48.411 sec)
INFO:tensorflow:global_step/sec: 2.07221
INFO:tensorflow:loss = 0.38334956765174866, step = 4302 (48.258 sec)
INFO:tensorflow:loss = 0.38334956765174866 (48.258 sec)
INFO:tensorflow:global_step/sec: 2.0657
INFO:tensorflow:loss = 0.40208935737609863, step = 4402 (48.411 sec)
INFO:tensorflow:loss = 0.40208935737609863 (48.411 sec)
INFO:tensorflow:global_step/sec: 2.07488
INFO:tensorflow:loss = 0.4081370234489441, step = 4502 (48.194 sec)
INFO:tensorflow:loss = 0.4081370234489441 (48.194 sec)
INFO:tensorflow:global_step/sec: 2.06783
INFO:tensorflow:loss = 0.3487881124019623, step = 4602 (48.359 sec)
INFO:tensorflow:loss = 0.3487881124019623 (48.359 sec)
INFO:tensorflow:global_step/sec: 2.06251
INFO:tensorflow:loss = 0.38335728645324707, step = 4702 (48.486 sec)
INFO:tensorflow:loss = 0.38335728645324707 (48.486 sec)
INFO:tensorflow:global_step/sec: 2.06302
INFO:tensorflow:loss = 0.38342994451522827, step = 4802 (48.472 sec)
INFO:tenso

INFO:tensorflow:loss = 0.1761423498392105 (114.481 sec)
INFO:tensorflow:global_step/sec: 0.871814
INFO:tensorflow:loss = 0.183481365442276, step = 8802 (114.702 sec)
INFO:tensorflow:loss = 0.183481365442276 (114.702 sec)
INFO:tensorflow:Saving checkpoints for 8826 into /home/ec2-user/convnet_model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.867327
INFO:tensorflow:loss = 0.16881972551345825, step = 8902 (115.296 sec)
INFO:tensorflow:loss = 0.16881972551345825 (115.296 sec)
INFO:tensorflow:global_step/sec: 0.870045
INFO:tensorflow:loss = 0.20439046621322632, step = 9002 (114.939 sec)
INFO:tensorflow:loss = 0.20439046621322632 (114.939 sec)
INFO:tensorflow:global_step/sec: 0.872048
INFO:tensorflow:loss = 0.16857871413230896, step = 9102 (114.671 sec)
INFO:tensorflow:loss = 0.16857871413230896 (114.671 sec)
INFO:tensorflow:global_step/sec: 0.869426
INFO:tensorflow:loss = 0.17207175493240356, step = 9202 (115.019 sec)
INFO:tensorflow:loss = 0.17207175493240356 (115.018 sec)
INFO:tensorfl

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7fbf1db782e8>

In [16]:
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": X_train}, y=y_train, num_epochs=1, shuffle=False)
eval_results = car_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-27T04:44:10Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /home/ec2-user/convnet_model/model.ckpt-10001
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-27-04:44:13
INFO:tensorflow:Saving dict for global step 10001: accuracy = 0.9986844, global_step = 10001, loss = 0.10918891
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10001: /home/ec2-user/convnet_model/model.ckpt-10001
{'accuracy': 0.9986844, 'loss': 0.10918891, 'global_step': 10001}


In [17]:
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": X_test}, y=y_test, num_epochs=1, shuffle=False)
eval_results = car_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-27T04:44:21Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /home/ec2-user/convnet_model/model.ckpt-10001
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-27-04:44:22
INFO:tensorflow:Saving dict for global step 10001: accuracy = 0.5730347, global_step = 10001, loss = 1.0679727
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10001: /home/ec2-user/convnet_model/model.ckpt-10001
{'accuracy': 0.5730347, 'loss': 1.0679727, 'global_step': 10001}
