In [1]:
import math
from IPython import display
from matplotlib import cm, gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10

ship_data = pd.read_csv("./objects.csv", sep=",")
ship_data_mix = ship_data.reindex(np.random.permutation(ship_data.index))

In [2]:
ship_data.describe()

Unnamed: 0,cls,xtl,ytl,xbr,ybr
count,7073.0,7073.0,7073.0,7073.0,7073.0
mean,1.0,111.337905,115.054856,142.125548,141.189594
std,0.0,79.475063,78.942056,79.61999,79.043337
min,1.0,0.0,0.0,0.0,0.0
25%,1.0,38.0,45.0,73.0,73.0
50%,1.0,109.0,114.0,142.0,141.0
75%,1.0,180.0,182.0,216.0,213.0
max,1.0,254.0,254.0,255.0,255.0


In [3]:
ship_data.head()

Unnamed: 0,image_id,cls,xtl,ytl,xbr,ybr
0,152d4ae2a637a0c5cbcc48a26d52c0e5/relevant/3704...,1,186,149,217,212
1,aa56582d9daf19519b8d3b3bb7c5fa54/relevant/6f90...,1,20,39,31,52
2,aa56582d9daf19519b8d3b3bb7c5fa54/relevant/62f4...,1,103,120,108,131
3,aa56582d9daf19519b8d3b3bb7c5fa54/relevant/62f4...,1,89,121,94,136
4,aa56582d9daf19519b8d3b3bb7c5fa54/relevant/62f4...,1,108,103,113,117


In [112]:
def read_img(file):
    img = plt.imread("./images/" + file)
    return img

def split_color(img):
    r = img[:,:,0].reshape(65536)
    g = img[:,:,1].reshape(65536)
    b = img[:,:,2].reshape(65536)
    return (r,g,b)

def preprocess_features(ship_data):
    selected_features = ship_data["image_id"]
    image_features = pd.DataFrame(columns=[range(196608)])
    for feature in selected_features:
        #print(feature)
        #r, g, b = split_color(read_img(feature))
        #np.append(r, g, b)
        #image.append(g)
        #image.append(b)
        image = read_img(feature)
        img = dict(enumerate(image.flatten(), 1))
        image_features["images"].append(img)
    return image_features

def preprocess_targets(ship_data):
    selected_targets = ship_data[[
        "xtl",
        "ytl",
        "xbr",
        "ybr"
    ]]
    preprocessed_targets = selected_targets.copy()
    return preprocessed_targets

In [113]:
training_examples = preprocess_features(ship_data_mix.head(100))
training_targets = preprocess_targets(ship_data_mix.head(100))
validation_examples = preprocess_features(ship_data_mix.tail(20))
validation_targets = preprocess_targets(ship_data_mix.tail(20))

print("Training Examples Summary:")
display.display(training_examples.describe())
print("Training Targets Summary:")
display.display(training_targets.describe())
print("Validation Examples Summary:")
display.display(validation_examples.describe())
print("Validation Targets Summary:")
display.display(validation_targets.describe())

KeyError: 'images'

In [108]:
def my_input_fn(features, targets, batch_size = 1, shuffle = True, num_epochs = None):
    features = {key:np.array(value) for key,value in dict(features).items()}
    targets = {key:np.array(value) for key,value in dict(targets).items()}
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    if shuffle:
        ds = ds.shuffle(10000)
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

def construct_feature_columns(input_features):
    return set([tf.feature_column.numeric_column(my_feature) for my_feature in input_features])

def train_linear_classifier_model(
    learning_rate,
    steps,
    batch_size,
    training_examples,
    training_targets,
    validation_examples,
    validation_targets):
    
    periods = 10
    steps_per_period = steps / periods
    
    my_optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)
    
    linear_classifier = tf.estimator.LinearClassifier(
        feature_columns = construct_feature_columns(training_examples),
        optimizer = my_optimizer)
    
    training_input_fn = lambda:my_input_fn(
        training_examples,
        training_targets,
        batch_size = batch_size)
    predict_training_input_fn = lambda:my_input_fn(
        training_examples,
        training_targets,
        num_epochs = 1,
        shuffle = False)
    predict_validation_input_fn = lambda:my_input_fn(
        validation_examples,
        validation_targets,
        num_epochs = 1,
        shuffle = False)
    
    print("Training model...")
    print("RMSE (on training data):")
    training_rmse = []
    validation_rmse = []
    for period in range (0, periods):
        linear_classifier.train(
            input_fn = training_input_fn,
            steps = steps_per_period)
        
        training_predictions = linear_classifier.predict(input_fn = predict_training_input_fn)
        training_predictions = np.array([item['predictions'][0] for item in training_predictions])
        validation_predictions = linear_classifier.predict(input_fn = predict_validation_input_fn)
        validation_predictions = np.array([item['predictions'][0] for item in validation_predictions])
        
        training_RMSE = math.sqrt(metrics.mean_squared_error(training_predictions, training_targets))
        validation_RMSE = math.sqrt(metrics.mean_squared_error(validation_predictions, validation_targets))
        
        print("period %02d: %0.4f" % (period, training_RMSE))
        training_rmse.append(training_RMSE)
        validation_rmse.append(validation_RMSE)
    print("Model training finished.")
    
    plt.ylabel("RMSE")
    plt.xlabel("Periods")
    plt.title("Root Mean Squared Error vs. Periods")
    plt.tight_layout()
    plt.plot(training_rmse, label= "training")
    plt.plot(validation_rmse, label= "validation")
    plt.legend()
    
    print("Final RMSE (on validation data): %0.2f" % validation_RMSE)
    
    return linear_classifier

In [109]:
linear_classifier = train_linear_classifier_model(
    learning_rate=0.005,
    steps=100,
    batch_size=20,
    training_examples=training_examples,
    training_targets=training_targets,
    validation_examples=validation_examples,
    validation_targets=validation_targets
)

Training model...
RMSE (on training data):


ValueError: Dimensions 0 and 100 are not compatible