In [1]:
import pickle

import numpy as np
from sklearn import model_selection, preprocessing
import tensorflow as tf

# Data Preprocessing

In [29]:
with open("../data/raw/all_object_data_in_dictionary_format.pkl", "rb") as pickled_data:
    all_data = pickle.load(pickled_data)

In [50]:
X, y = all_data["images"], all_data["targets"]

In [None]:
scaler = preprocessing.MinMaxScaler()
Z = scaler.fit_transform(X.reshape(-1, 3 * 51**2))

In [None]:
(training_features, training_target), (testing_features, testing_target) = model_selection.train_test_split(Z, y, test_size=0.2)

# Estimator API

In [23]:
tf.estimator.Estimator?

In [12]:
def cnn_model_fn(features: tf.Tensor, labels: tf.Tensor, mode: tf.estimator.ModeKeys) -> tf.estimator.EstimatorSpec:
    """Function builds a DAG and wraps it in an EstimatorSpec"""
    
    # reshape the inputs
    input_layer = tf.reshape(features, [-1, 3, 51, 51])
    
    # convolutional layers
    convolution_layer_1 = tf.layers.conv2d(inputs=input_layer,
                                           filters=32,
                                           kernel_size=(5, 5),
                                           padding="same",
                                           data_format="channels_first",
                                           activation=tf.nn.relu)
    
    pooling_layer_1 = tf.layers.max_pooling2d(inputs=convolution_layer_1,
                                              pool_size=(2, 2),
                                              strides=2,
                                              data_format="channels_first",)
    
    convolution_layer_2 = tf.layers.conv2d(inputs=input_layer,
                                           filters=64,
                                           kernel_size=(5, 5),
                                           padding="same",
                                           data_format="channels_first",
                                           activation=tf.nn.relu)
    
    pooling_layer_2 = tf.layers.max_pooling2d(inputs=convolution_layer_2,
                                              pool_size=(2, 2),
                                              strides=2,
                                              data_format="channels_first",)
    
    # dense layers
    flatten_layer = tf.layers.flatten(inputs=pooling_layer_2)
    dense_layer = tf.layers.dense(inputs=flatten_layer,
                                  units=1024,
                                  activation=tf.nn.relu)
    dropout_layer = tf.layers.dropout(inputs=dense_layer,
                                      rate=0.45,
                                      training=(mode == tf.estimator.ModeKeys.TRAIN))
    
    # output and loss layers
    logit_layer = tf.layers.dense(inputs=dropout_layer, units=1)
    predicted_labels = tf.argmax(inputs=logit_layer)
    loss = tf.losses.softmax_cross_entropy(labels=labels, logits=logit_layer)
        
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss,
                                      global_step=tf.train.get_global_step())
        estimator_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    elif mode == tf.estimator.ModeKeys.EVAL:
        accuracy = tf.metrics.accuracy(labels, predicted_labels)
        recall = tf.metrics.recall(labels, predicted_labels)
        f_score = tf.metrics.f_score(labels, predicted_labels)
        auc = tf.metrics.accuracy(labels, predicted_labels)
        eval_metric_ops = {"accuracy": accuracy,
                           "recall": recall,
                           "area_under_curve": auc,
                           "f_score": f_score}
        estimator_spec = tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)
    else:
        predictions = {"classes": predicted_labels,
                       "probabilities": tf.nn.sigmoid(logit_layer)}
        estimator_spec = tf.estimatorim.EstimatorSpec(mode, predictions)
    
    return estimator_spec
    
    

# Creating the Estimator

In [10]:
super_nova_estimator = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              model_dir="../models/super-nova-classifiers/cnn")

# Training the model

In [24]:
tf.estimator.inputs.numpy_input_fn?

In [18]:
training_input_fn = tf.estimator.inputs.numpy_input_fn(x=training_features,
                                                       y=training_labels,
                                                       batch_size=128,
                                                       num_epochs=None,
                                                       shuffle=True)

In [None]:
super_nova_estimator.train(input_fn=training_input_fn, steps=1000)


# Evaluating the model

In [21]:
evaluation_input_fn = tf.estimator.inputs.numpy_input_fn(x=testing_features,
                                                         y=testing_labels,
                                                         num_epochs=1,
                                                         shuffle=False)

evaluation_results = super_nova_estimator.evaluate(input_fn=evaluation_input_fn)