In [2]:
import os
import sys
import time

import numpy as np
import matplotlib.pyplot as plt
import scipy
import cv2
import theano
import theano.tensor as T

%matplotlib inline
import matplotlib

import theano.sandbox.cuda
theano.sandbox.cuda.use("gpu0")

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, CuDNN 4004)


In [3]:
sys.path.append('/usr0/home/glample/Research/perso/UltraDeep/')

from experiment import Experiment
from learning_method import LearningMethod
from layer import HiddenLayer, EmbeddingLayer, DropoutLayer
from network import FastLSTM
from convolution import Conv2DLayer

  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [4]:
data_path = '/usr0/home/glample/Research/kaggle/DataScienceGame/data/'

images_path = os.path.join(data_path, 'roof_images')
images_filenames = [f for f in os.listdir(images_path) if os.path.isfile(os.path.join(images_path, f))]
print('Found %i images' % len(images_filenames))

Found 42759 images


In [5]:
id_to_img = {}
img_to_id = {}
for i, line in enumerate(open(os.path.join(data_path, 'id_train.csv'))):
    if i == 0:
        continue
    line = line.rstrip().split(',')
    assert len(line) == 2 and line[1].isdigit() and (line[0].isdigit() or line[0][0] == '-' and line[0][1:].isdigit())
    assert int(line[0]) not in img_to_id
    img_to_id[int(line[0])] = i - 1
    if int(line[0]) < 0: # TODO: why are ID negative?????
        continue
    image_path = os.path.join(images_path, '%i.jpg' % abs(int(line[0])))
    assert os.path.isfile(image_path), image_path
    id_to_img[i - 1] = {
        'img_id': int(line[0]),
        'label': int(line[1]),
        'image': scipy.misc.imread(image_path)
    }
print('Found %i elements' % len(id_to_img))
# print(len([True for k, v in id_to_img.items() if v['img_id'] > 0]))
# print(len([True for k, v in id_to_img.items() if v['img_id'] <= 0]))

Found 7987 elements


In [6]:
def process_image(image, gray, height, width):
    image = image.astype(np.float32).mean(axis=2) if gray else image
    if image.shape[:2] != (height, width):
        image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
    return image if gray else image.transpose(2, 0, 1)

In [7]:
x_data = [(process_image(v['image'], False, 80, 80).astype(np.float32) / 255., v['label'] - 1) for k, v in id_to_img.items()]
x_data, y_data = zip(*x_data)

In [8]:
dropout_rate = 0.5
hidden_layer_size = 128

is_train = T.iscalar()
x_indexes = T.ivector()
x_image = T.ftensor4()
y_image = T.ivector()

# 2 convolutions
conv_layer1 = Conv2DLayer(20, 3, 8, 8, 'valid', (4, 4), 'conv_layer1')
conv_layer2 = Conv2DLayer(64, 20, 8, 8, 'valid', (2, 2), 'conv_layer2')
conv_output = conv_layer2.link(conv_layer1.link(x_image)).reshape((x_image.shape[0], 64 * 6 * 6))
# print('conv_output', conv_output.eval({x_image: np.random.rand(2, 3, 80, 80).astype(np.float32)}).shape)

# dropout layer
dropout_layer1 = DropoutLayer(p=dropout_rate)
conv_output = T.switch(T.neq(is_train, 0), dropout_layer1.link(conv_output), (1 - dropout_rate) * conv_output)

# hidden layer
hidden_layer = HiddenLayer(64 * 6 * 6, hidden_layer_size)
hidden_output = hidden_layer.link(conv_output)
# print('hidden_output', hidden_output.eval({is_train: 0, x_image: np.random.rand(2, 3, 80, 80).astype(np.float32)}).shape)

# dropout layer
dropout_layer2 = DropoutLayer(p=dropout_rate)
hidden_output = T.switch(T.neq(is_train, 0), dropout_layer2.link(hidden_output), (1 - dropout_rate) * hidden_output)

# final layer
final_layer = HiddenLayer(hidden_layer_size, 4, activation='softmax')
final_output = final_layer.link(hidden_output)
# print('final_output', final_output.eval({is_train: 0, x_image: np.random.rand(2, 3, 80, 80).astype(np.float32)}).shape)

# cost
cost = T.nnet.categorical_crossentropy(final_output, y_image).mean()
# print('cost', cost.eval({is_train: 1, x_image: np.random.rand(2, 3, 80, 80).astype(np.float32), y_image: np.random.randint(0, 4, (2,)).astype(np.int32)}))

In [9]:
params = conv_layer1.params + conv_layer2.params + hidden_layer.params + final_layer.params
lr_method_parameters = {}

f_eval = theano.function(
    inputs=[x_image],
    outputs=final_output,
    givens={is_train: np.cast['int32'](0)}
)

f_train = theano.function(
    inputs=[x_image, y_image],
    outputs=cost,
    updates=LearningMethod(clip=5.0).get_updates('sgd', cost, params, **lr_method_parameters),
    givens={is_train: np.cast['int32'](1)}
)

In [10]:
def evaluate(x, y, batch_size=100):
    count_correct = 0
    for i in xrange(0, len(x), batch_size):
        count_correct += np.sum(f_eval(x[i:i + batch_size]).argmax(axis=1) == y[i:i + batch_size])
    return count_correct * 1.0 / len(x)

In [11]:
x_train = x_data[:6000]
y_train = y_data[:6000]
x_valid = x_data[6000:]
y_valid = y_data[6000:]

batch_size = 1
n_epochs = 1000000
best_accuracy = -1
count = 0
last_costs = []
start = time.time()

for n_epoch in xrange(n_epochs):
    print('Starting epoch %i...' % n_epoch)
    perm = np.random.permutation(len(x_train))
    x_train = [x_train[i] for i in perm]
    y_train = [y_train[i] for i in perm]
    for j in xrange(0, len(x_train), batch_size):
        count += 1
        new_cost = f_train(x_train[j:j + batch_size], y_train[j:j + batch_size])
        last_costs.append(new_cost)
        if count % 100 == 0:
            print('{0:>6} - {1}'.format(count, np.mean(last_costs)))
            last_costs = []
    new_accuracy = evaluate(x_valid, y_valid)
    if new_accuracy > best_accuracy:
        best_accuracy = new_accuracy
    print('Epoch %i done.' % n_epoch)
    print('Time: %.5f - New accuracy: %.5f - Best: %.5f' % (time.time() - start, new_accuracy, best_accuracy))

Starting epoch 0...
   100 - 1.34168386459
   200 - 1.33930647373
   300 - 1.31409776211
   400 - 1.36630141735
   500 - 1.24309694767
   600 - 1.30089902878
   700 - 1.20569980145
   800 - 1.2770011425
   900 - 1.30267369747
  1000 - 1.27682614326
  1100 - 1.31822216511
  1200 - 1.37841916084
  1300 - 1.19296646118
  1400 - 1.29016053677
  1500 - 1.35383450985
  1600 - 1.36804139614
  1700 - 1.32413160801
  1800 - 1.38103473186
  1900 - 1.34330058098
  2000 - 1.36027741432
  2100 - 1.28671419621
  2200 - 1.30860304832
  2300 - 1.34086728096
  2400 - 1.29853236675
  2500 - 1.30052947998
  2600 - 1.32884657383
  2700 - 1.20669269562
  2800 - 1.26778256893
  2900 - 1.38209676743
  3000 - 1.29605662823
  3100 - 1.23615825176
  3200 - 1.3115196228
  3300 - 1.288220644
  3400 - 1.20944833755
  3500 - 1.29487192631
  3600 - 1.24922406673
  3700 - 1.32376587391
  3800 - 1.28202617168
  3900 - 1.18968498707
  4000 - 1.28105258942
  4100 - 1.26003086567
  4200 - 1.17896854877
  4300 - 1.1318304

KeyboardInterrupt: 

In [None]:
# look at the different image shapes
# the ratio heigth / width can be very big / small, so resizing as a square may hurt a lot

shapes = [v['image'].shape for v in id_to_img.values()]
assert all(x[2] == 3 for x in shapes)
shapes = np.array([x[:2] for x in shapes])

print(shapes[:, 0].min())
print(shapes[:, 0].max())
print(shapes[:, 0].mean())
print
print(shapes[:, 1].min())
print(shapes[:, 1].max())
print(shapes[:, 1].mean())
print
print((shapes[:, 0].astype(np.float32) / shapes[:, 1].astype(np.float32)).min())
print((shapes[:, 0].astype(np.float32) / shapes[:, 1].astype(np.float32)).max())
print((shapes[:, 0].astype(np.float32) / shapes[:, 1].astype(np.float32)).mean())

In [None]:
#########
#########    ignore this
#########    GPU VERSION (store the dataset on GPU, not much faster though)
#########


x_train = np.array(x_data[:6000]).astype(np.float32)
y_train = np.array(y_data[:6000]).astype(np.int32)
x_valid = np.array(x_data[6000:]).astype(np.float32)
y_valid = np.array(y_data[6000:]).astype(np.int32)
x_train_shared = theano.shared(x_train)
y_train_shared = theano.shared(y_train)
x_valid_shared = theano.shared(x_valid)
y_valid_shared = theano.shared(y_valid)








params = conv_layer1.params + conv_layer2.params + hidden_layer.params + final_layer.params
lr_method_parameters = {}

f_eval = theano.function(
    inputs=[x_indexes],
    outputs=final_output,
    givens={is_train: np.cast['int32'](0), x_image: x_valid_shared[x_indexes]}
)

f_train = theano.function(
    inputs=[x_indexes],
    outputs=cost,
    updates=LearningMethod(5.0).get_updates('sgd', cost, params, **lr_method_parameters),
    givens={is_train: np.cast['int32'](1), x_image: x_train_shared[x_indexes], y_image: y_train_shared[x_indexes]}
)










def evaluate(batch_size=100):
    count_correct = 0
    eval_size = len(x_valid)
    for i in xrange(0, eval_size, batch_size):
        # count_correct += np.sum(f_eval(x[i:i + batch_size]).argmax(axis=1) == y[i:i + batch_size])
        count_correct += np.sum(f_eval(np.arange(i, min(i + batch_size, len(x_valid)), dtype=np.int32)).argmax(axis=1) == y_valid[i:i + batch_size])
    return count_correct * 1.0 / eval_size









batch_size = 1
n_epochs = 1000000
best_accuracy = -1
count = 0
last_costs = []
start = time.time()

for i in xrange(n_epochs):
    perm = np.random.permutation(len(x_train)).astype(np.int32)
    for j in xrange(0, len(perm), batch_size):
        count += 1
        new_cost = f_train(perm[j:j + batch_size])
        last_costs.append(new_cost)
        if count % 500 == 0:
            print(count, np.mean(last_costs))
            last_costs = []
    new_accuracy = evaluate()
    if new_accuracy > best_accuracy:
        best_accuracy = new_accuracy
    print('%f - New accuracy: %f - Best: %f' % (time.time() - start, new_accuracy, best_accuracy))