### Creating .csv files

In [60]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [61]:
# converts matrix (n, m), representing an image, into a vector (n*m, )
def img_to_row(img, index):
    data = img.flatten().astype(float)
    return data

# returns a matrix representing a directory of images (row = image, column = pixel number)
def img_dir_to_matrix(root_path, extension = '', need_names = False):
    index = 0
    data = None
    filenames = []
    for (path, dirs, files) in os.walk(root_path):
        for filename in files:
            if (filename.endswith(extension)):
                fullpath = os.path.join(path, filename)
                img = cv2.imread(fullpath, 0)
                row = img_to_row(img, index)
                if index == 0:
                    data = row.reshape(1, -1)
                else:
                    data = np.vstack([data, row])
                if need_names:
                    filenames.append(filename)
#                 if index == 5:
#                     return (data, filenames) if need_names else data
                index += 1
                if index % 5000 == 0:
                    print index
    return (data, filenames) if need_names else data

def export_to_csv(path, array, head = None):
    frame = pd.DataFrame(array, columns=head)
    frame.to_csv(path, sep = ',', header = (head != None), index = None)

In [62]:
def generate_train_class(root_path, extension, class_value):
    train = img_dir_to_matrix(root_path, extension)
    train_y = np.array([class_value] * train.shape[0])
    return (train, train_y)
    
def generate_train_sample(root_path, extension, class_true_dir, class_false_dir):
    if not root_path.endswith('/'):
        root_path += '/'
    print 'Generating training sample for True class'
    train_true, train_true_y = generate_train_class(root_path + class_true_dir, extension, 1)
    print 'Generating training sample for False class'
    train_false, train_false_y = generate_train_class(root_path + class_false_dir, extension, 0)
    
    print 'Merging samples'
    trainX = np.concatenate((train_true, train_false), 0)
    trainY = np.concatenate((train_true_y, train_false_y), 0)
    return (trainX, trainY)

def generate_test_sample(root_path, extension):
    print 'Generating test sample'
    if not root_path.endswith('/'):
        root_path += '/'
    (data, filenames) = img_dir_to_matrix(root_path, extension, need_names=True)
    test_ids = np.array([int(filename.split('.')[0]) for filename in filenames])
    frame = pd.DataFrame(data)
    frame['id'] = test_ids
    return frame

In [64]:
%time trainX, trainY = generate_train_sample('data/train/', '.tif', 'Hieroglyph', 'Other')
%time test_frame = generate_test_sample('data/test/', '.tif')

export_to_csv('data/train/trainX.csv', trainX)
export_to_csv('data/train/trainY.csv', trainY)
test_frame.to_csv('data/test/testX.csv', index=False)

Generating training sample for True class
5000
10000
15000
20000
25000
30000
35000
40000
Generating training sample for False class
5000
10000
15000
20000
25000
30000
35000
40000
Merging samples
CPU times: user 10min 54s, sys: 4min 16s, total: 15min 11s
Wall time: 15min 13s
5000
10000
15000
20000
25000
30000
CPU times: user 3min 54s, sys: 1min 35s, total: 5min 29s
Wall time: 5min 30s


### Чтение данных

In [82]:
%time trainX = pd.read_csv('data/train/trainX.csv', header=None, dtype=np.float32).values.reshape((-1, 1, 20, 20))
%time trainY = pd.read_csv('data/train/trainY.csv', header=None, dtype=np.int32).values.reshape((-1))
%time test_info = pd.read_csv('data/test/testX.csv')
testIds = test_info['id'].values.astype(int)
testX = test_info.drop('id', 1).values.astype(np.float32).reshape((-1, 1, 20, 20))

CPU times: user 2.67 s, sys: 45.5 ms, total: 2.72 s
Wall time: 2.72 s
CPU times: user 4.7 ms, sys: 0 ns, total: 4.7 ms
Wall time: 4.75 ms
CPU times: user 1.18 s, sys: 12.6 ms, total: 1.19 s
Wall time: 1.19 s


### Построение нейронной сети

In [83]:
import lasagne
import lasagne.layers as layers
from lasagne.nonlinearities import softmax
from lasagne.updates import adam
from lasagne.updates import nesterov_momentum

from nolearn.lasagne import NeuralNet

In [84]:
layers0 = [
    # layer dealing with the input data
    (layers.InputLayer, {'shape': (None, 1, 20, 20)}),

    # first stage of our convolutional layers
    (layers.Conv2DLayer, {'num_filters': 16, 'filter_size': 5}),
    (layers.Conv2DLayer, {'num_filters': 32, 'filter_size': 3}),
    (layers.Conv2DLayer, {'num_filters': 32, 'filter_size': 3}),
    (layers.MaxPool2DLayer, {'pool_size': 2}),

    # second stage of our convolutional layers
    (layers.Conv2DLayer, {'num_filters': 32, 'filter_size': 3}),
    (layers.Conv2DLayer, {'num_filters': 32, 'filter_size': 3}),
    (layers.MaxPool2DLayer, {'pool_size': 2}),

    # two dense layers with dropout
    (layers.DenseLayer, {'num_units': 16}),
    (layers.DropoutLayer, {}),
    (layers.DenseLayer, {'num_units': 16}),

    # the output layer
    (layers.DenseLayer, {'num_units': 2, 'nonlinearity': softmax}),
]

In [85]:
net0 = NeuralNet(
    layers = layers0,
    max_epochs = 100,
    update = adam,
    update_learning_rate = 0.01,
    verbose = 2
)

In [86]:
random_indices = np.arange(np.size(trainY))
np.random.shuffle(random_indices)
# print trainY[random_indices]
net0.fit(trainX[random_indices], trainY[random_indices])

# Neural Network with 33634 learnable parameters

## Layer information

name        size        total    cap.Y    cap.X    cov.Y    cov.X
----------  --------  -------  -------  -------  -------  -------
input0      1x20x20       400   100.00   100.00   100.00   100.00
conv2d1     16x16x16     4096   100.00   100.00    25.00    25.00
conv2d2     32x14x14     6272    42.86    42.86    35.00    35.00
conv2d3     32x12x12     4608    33.33    33.33    45.00    45.00
maxpool2d4  32x6x6       1152    33.33    33.33    45.00    45.00
conv2d5     32x4x4        512    46.15    46.15    65.00    65.00
conv2d6     32x2x2        128    35.29    35.29    85.00    85.00
maxpool2d7  32x1x1         32    35.29    35.29    85.00    85.00
dense8      16             16   100.00   100.00   100.00   100.00
dropout9    16             16   100.00   100.00   100.00   100.00
dense10     16             16   100.00   100.00   100.00   100.00
dense11     2               2   100.00   100.00   100.00   100.00

Exp

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3a7b25bf90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3a7b25be90>,
     check_input=True, custom_scores=None,
     layers=[(<class 'lasagne.layers.input.InputLayer'>, {'shape': (None, 1, 20, 20)}), (<class 'lasagne.layers.conv.Conv2DLayer'>, {'filter_size': 5, 'num_filters': 16}), (<class 'lasagne.layers.conv.Conv2DLayer'>, {'filter_size': 3, 'num_filters': 32}), (<class 'lasagne.layers.conv.Conv2DLayer'>, {'fil....layers.dense.DenseLayer'>, {'num_units': 2, 'nonlinearity': <function softmax at 0x7f3a7be1b320>})],
     loss=None, max_epochs=100, more_params={},
     objective=<function objective at 0x7f3a7b25d668>,
     objective_loss_function=<function categorical_crossentropy at 0x7f3a7bcaa758>,
     on_batch_finished=[],
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7f3a80855248>],
     on_training_finished=[],
     on

### Result
No progress in learning