## Imports

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
from __future__ import division, print_function

import h5py

# Импортируем TensorFlow и tf.keras
import tensorflow as tf
from tensorflow import keras

import warnings
warnings.filterwarnings('ignore')

#%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
sns.set(rc={'figure.figsize' : (12, 6)})
sns.set_style("darkgrid", {'axes.grid' : True})

import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score

# Display all columns of dataframe
pd.set_option('display.max_columns', None)

## Explore the data
### About Dataset
**The main dataset (letters.zip)**
* 1650 (50x33) color images (32x32x3) with 33 letters and the file with labels letters.txt.
* Photo files are in the .png format and the labels are integers and values.
* Additional letters.csv file.
* The file LetterColorImages.h5 consists of preprocessing images of this set: image tensors and targets (labels).

**The additional dataset (letters2.zip)**
* 5940 (180x33) color images (32x32x3) with 33 letters and the file with labels letters2.txt.
* Photo files are in the .png format and the labels are integers and values.
* Additional letters2.csv file.
* The file LetterColorImages2.h5 consists of preprocessing images of this set: image tensors and targets (labels).

**The additional dataset (letters3.zip)**
* 6600 (200x33) color images (32x32x3) with 33 letters and the file with labels letters2.txt.
* Photo files are in the .png format and the labels are integers and values.
* Additional letters3.csv file.
* The file LetterColorImages3.h5 consists of preprocessing images of this set: image tensors and targets (labels).

Letter Symbols => Letter Labels:   
а=>1, б=>2, в=>3, г=>4, д=>5, е=>6, ё=>7, ж=>8, з=>9, и=>10, й=>11, к=>12, л=>13, м=>14, н=>15, о=>16, п=>17, р=>18, с=>19, т=>20, у=>21, ф=>22, х=>23, ц=>24, ч=>25, ш=>26, щ=>27, ъ=>28, ы=>29, ь=>30, э=>31, ю=>32, я=>33

Image Backgrounds => Background Labels:   
striped=>0, gridded=>1, no background=>2, graph paper=>3

## Load Data

In [None]:
data1 = pd.read_csv("../input/letters.csv")
data2 = pd.read_csv("../input/letters2.csv")
data3 = pd.read_csv("../input/letters3.csv")
print("First dataset shape: {0}, \nSecond dataset shape: {1}, \nThird dataset shape: {2}".format(data1.shape, data2.shape, data3.shape))

In [None]:
data1.head()

In [None]:
# Read the h5 file
f = h5py.File('../input/LetterColorImages_123.h5', 'r')
# List all groups
keys = list(f.keys())
keys 

In [None]:
# Create tensors and targets of images
img_backgrounds = np.array(f[keys[0]])
img_tensors = np.array(f[keys[1]])
targets = np.array(f[keys[2]])
print ('Tensor shape:', img_tensors.shape)
print ('Target shape', targets.shape)
print ('Background shape:', img_backgrounds.shape)

In [None]:
# Concatenate series
letters = pd.concat((data1["letter"], data2["letter"]), axis=0, ignore_index=True)
letters = pd.concat((letters, data3["letter"]), axis=0, ignore_index=True)
len(letters)

In [None]:
# Normalize the tensors
img_tensors = img_tensors/255
img_tensors[0][0][0][0]

## Visualization
Lets see the distributions of data in dataset to find out if there are any unbalanced classes:

In [None]:
sns.countplot(x="label", data=data1)

In [None]:
sns.countplot(x="background", data=data1)

In [None]:
# Read and display a tensor using Matplotlib
sns.set_style("darkgrid", {'axes.grid' : False})
print('Label: ', letters[100])
plt.figure(figsize=(3,3))
plt.imshow(img_tensors[100]);

In [None]:
type(img_tensors[0])

In [None]:
img_tensors[0].shape

In [None]:
# Display the first image of each label.
def display_images_and_labels(images, labels):
    unique_labels = set(labels)
    plt.figure(figsize=(15, 15))
    i = 1
    labels = labels.tolist()
    for label in unique_labels:
        # Pick the first image for each label.
        image = images[labels.index(label)]
        plt.subplot(8, 8, i)  # A grid of 8 rows x 8 columns
        plt.axis('off')
        plt.title("Label {0} ({1})".format(label, labels.count(label)))
        i += 1
        _ = plt.imshow(image)
    plt.show()
display_images_and_labels(img_tensors, targets)

As we can see, 21th letter is hardly seen on the first picture of dataset.
So lets try to look on others variants of spelling letter 21:

In [None]:
# Display images of a specific label.
def display_label_images(images, labels, label):
    limit = 24  # show a max of 24 images
    plt.figure(figsize=(15, 5))
    i = 1
    labels = labels.tolist()
    start = labels.index(label)
    end = start + labels.count(label)
    for image in images[start:end][:limit]:
        plt.subplot(3, 8, i)  # 3 rows, 8 per row
        plt.axis('off')
        i += 1
        plt.imshow(image)
    plt.show()

display_label_images(img_tensors, targets, 21)

In [None]:
# Display images of a specific label.
def display_images_grayscale(images, labels, label):
    limit = 24  # show a max of 24 images
    plt.figure(figsize=(15, 5))
    i = 1
    labels = labels.tolist()
    start = labels.index(label)
    end = start + labels.count(label)
    for image in images[start:end][:limit]:
        plt.subplot(3, 8, i)  # 3 rows, 8 per row
        plt.axis('off')
        i += 1
        plt.imshow(image)
    plt.show()

## Pre-Proccessing data

In [None]:
# Make dictionary to decode index to letters
dictionary = {'num': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], \
              'letter': ['а','б','в','г','д','е','ё','ж','з','и','й','к','л','м','н','о','п','р','с', \
                         'т','у','ф','х','ц','ч','ш','щ','ъ','ы','ь','э','ю','я']}
letter_dict = pd.DataFrame.from_dict(dictionary)
letter_dict = letter_dict.set_index("num")
letter_dict.head()

In [None]:
# One-hot encoding the targets, started from the zero label
from keras.utils import to_categorical
coded_targets = to_categorical(np.array(targets-1), 33)
coded_targets.shape

Lets convert RGB image to Grayscale to simplify structure of model of neuron network.

In [None]:
from tensorflow import image
from tensorflow.image import rgb_to_grayscale
img_tensors_gs_tf = rgb_to_grayscale(img_tensors)

sess = tf.Session()
with sess.as_default():
    print(img_tensors_gs_tf.eval().shape)
    arr_img_tensors_gs_tf = img_tensors_gs_tf.eval()

In [None]:
img_tensors_gs = arr_img_tensors_gs_tf
for image in img_tensors_gs[:5]:
    print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

In [None]:
# from skimage.color import rgb2grey
# # img_tensors_gs = np.asarray(images32)
# img_tensors_gs = rgb2grey(img_tensors)
# for image in img_tensors_gs[:5]:
#     print("shape: {0}, min: {1}, max: {2}".format(image.shape, image.min(), image.max()))

In [None]:
img_tensors_gs.shape

In [None]:
# Split the data to test and train
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(img_tensors_gs, coded_targets, test_size = 0.2, random_state = 1)
print("Train dataset shape: {0}, \nTest dataset shape: {1}".format(X_train.shape, X_test.shape))

In [None]:
# Split the test data to validation and test sets
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size = 0.5, random_state = 1)
print("Validation dataset shape: {0}, \nTest dataset shape: {1}".format(X_valid.shape, X_test.shape))

## Build the model
The neural network is created by stacking layers—this requires two main architectural decisions:

1. How many layers to use in the model?
2. How many hidden units to use for each layer?

In this example, the input data consists of tensors with images of each letter. The labels to predict are numbers from 1 to 33.   
Let's build a model for this problem with few different architectures:

*  Simple Convolutional Neural Network
*  A Convolutional Neural Network using Estimators API

In [None]:
from keras.preprocessing import image as keras_image
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.metrics import top_k_categorical_accuracy, categorical_accuracy

from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.layers.advanced_activations import PReLU, LeakyReLU, Softmax
from keras.layers import Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D

def top_3_categorical_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=3)

### Simple Convolutional Neural Network

The tf.layers module contains methods to create each of the three layer types above:
* **conv2d()**. Constructs a two-dimensional convolutional layer. Takes **number of filters, filter kernel size, padding, activation function, kernel, bias regularizers and input shape** as arguments.   
*'Same' padding* means the size of output feature-maps are the same as the input feature-maps (under the assumption of stride=1). For instance, if input is nin channels with feature-maps of size 28×28, then in the output you expect to get nout feature maps each of size 28×28 as well.
* **max_pooling2d()**. Constructs a two-dimensional pooling layer using the max-pooling algorithm. Takes **pooling filter size and stride** as arguments.
* **dense().** Constructs a dense layer. Takes **number of neurons and activation function** as arguments.   

Each of these methods accepts a tensor as input and returns a transformed tensor as output. This makes it easy to connect one layer to another: just take the output from one layer-creation method and supply it as input to another.

### Compile the model
Before the model is ready for training, it needs a few more settings. These are added during the model's compile step:

* **Loss function** —This measures how accurate the model is during training. We want to minimize this function to "steer" the model in the right direction.
* **Optimizer** —This is how the model is updated based on the data it sees and its loss function.
* **Metrics** —Used to monitor the training and testing steps. The following example uses accuracy, the fraction of the images that are correctly classified.

In [None]:
def model():
    model = Sequential()
    
    # Define a model architecture    
    model.add(Conv2D(32, (5, 5), padding='same', input_shape=X_train.shape[1:]))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(LeakyReLU(alpha=0.02))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, (3, 3), padding='same'))
    model.add(LeakyReLU(alpha=0.02))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))
    
    # Global max pooling is ordinary max pooling layer with pool size equals to the size of the input (minus filter size + 1, to be precise). 
    model.add(GlobalMaxPooling2D())
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.02))
    model.add(Dropout(0.2)) 
    
    model.add(Dense(33))
    model.add(Activation('softmax'))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[categorical_accuracy, top_3_categorical_accuracy])
    return model

model = model()
model.summary()

### Create callbacks
A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training.
* **ModelCheckpoint** - Save the model after every epoch.
* **ReduceLROnPlateau** - Reduce learning rate when a metric has stopped improving.Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This callback monitors a quantity and if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced.
* **EarlyStopping** - Stop training when a monitored quantity has stopped improving.

In [None]:
# Create callbacks
checkpointer = ModelCheckpoint(filepath='weights.best.model.hdf5', verbose=1, save_best_only=True)
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.75)
# early_stoping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
callbacks = [checkpointer, lr_reduction]                            

## Train the model

In [None]:
# Train the model
# batch size = 473 as it divides X_train size, X_test and X_valid sizes evenly
history = model.fit(X_train, y_train, epochs=200, batch_size=473, verbose=1, validation_data=(X_valid, y_valid), callbacks=callbacks)                    

In [None]:
# Plot the Neural network fitting history
def history_plot(fit_history, n):
    plt.figure(figsize=(18, 12))
    
    plt.subplot(211)
    plt.plot(fit_history.history['loss'][n:], color='slategray', label = 'train')
    plt.plot(fit_history.history['val_loss'][n:], color='#4876ff', label = 'valid')
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.title('Loss Function');  
    
    plt.subplot(212)
    plt.plot(fit_history.history['categorical_accuracy'][n:], color='slategray', label = 'train')
    plt.plot(fit_history.history['val_categorical_accuracy'][n:], color='#4876ff', label = 'valid')
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")    
    plt.legend()
    plt.title('Accuracy');

In [None]:
# Plot the training history
history_plot(history, 0)

In [None]:
# Load the model with the best validation accuracy
model.load_weights('weights.best.model.hdf5')
# Calculate classification accuracy on the testing set
score = model.evaluate(X_test, y_test)
score

## Make & Display Predictions

In [None]:
# Model predictions for the testing dataset
y_test_predict = model.predict_classes(X_test)

In [None]:
# Create a list of symbols
symbols = ['а','б','в','г','д','е','ё','ж','з','и','й',
           'к','л','м','н','о','п','р','с','т','у','ф',
           'х','ц','ч','ш','щ','ъ','ы','ь','э','ю','я']

In [None]:
# Display true labels and predictions
fig = plt.figure(figsize=(14, 14))
for i, idx in enumerate(np.random.choice(X_test.shape[0], size=16, replace=False)):
    ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(X_test[idx]), cmap="gray")
    pred_idx = y_test_predict[idx]
    true_idx = np.argmax(y_test[idx])
    ax.set_title("{} ({})".format(symbols[pred_idx], symbols[true_idx]),
                 color=("#4876ff" if pred_idx == true_idx else "darkred"))

## Save the final model
Then we can use model.save(filepath) to save a Keras model into a single HDF5 file which will contain:

* the architecture of the model, allowing to re-create the model
* the weights of the model
* the training configuration (loss, optimizer)
* the state of the optimizer, allowing to resume training exactly where you left off.

In [None]:
# Save model
model.save('HW_best_model.h5')