To classify the handwritten Chinese digits or Russian letters using the intensity values as nodes and neighborhood relationships as edges.


References This notebook was built using the following resources:

https://github.com/danielegrattarola/spektral/blob/master/spektral/datasets/mnist.py

https://github.com/danielegrattarola/spektral/blob/master/examples/other/graph_signal_classification_mnist.py

https://www.kaggle.com/kmader/mnist-based-graphs

https://www.kaggle.com/alincijov/schedulers-for-beginners-using-chinesse-mnist

https://www.kaggle.com/alincijov/cnn-pytorch-russian-letters/data

In [None]:
!pip install --upgrade pip

In [None]:
!pip install spektral

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
import gc
gc.enable()
import cv2
from PIL import Image
import scipy.sparse as sp

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.utils import shuffle
from sklearn.neighbors import kneighbors_graph

import seaborn as sns
import networkx as nx

In [None]:
from spektral.layers.ops import sp_matrix_to_sp_tensor
from spektral.layers import GCNConv, GlobalSumPool
from spektral.data import Dataset, Graph
from spektral.data.loaders import PackedBatchLoader
from spektral.layers import GCNConv, GCSConv, GlobalAvgPool

In [None]:
# Detect hardware, return appropriate distribution strategy
def get_strategy():
    gpu = ""
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())     
    except ValueError:
        tpu = None
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        gpu = tf.config.list_physical_devices("GPU")
        if len(gpu) == 1:
            print('Running on GPU ', gpu)
    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    elif len(gpu) == 1:
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
        tf.config.optimizer.set_experimental_options({"auto_mixed_precision":True})
    else:
        strategy = tf.distribute.get_strategy()
    print("REPLICAS: ", strategy.num_replicas_in_sync)
    return strategy


strategy = get_strategy()

In [None]:
LANGUAGE = 0  #0=Chinese handwritten digits, 1=Russian handwritten letters
DEBUG    = False

In [None]:
if LANGUAGE == 0:
    path = '../input/chinese-mnist/data/data/'
    df = pd.read_csv('../input/chinese-mnist/chinese_mnist.csv')
    char_values = np.unique(df['character'].values)
    MNIST_SIZE = 64
    
else:
    path = '../input/russian-handwritten-letters/all_letters_image/all_letters_image/'
    df = pd.read_csv('../input/russian-handwritten-letters/all_letters_info.csv')    
    df['label'] = df['label']-1  #label index start from 0
    char_values = np.unique(df['letter'].values)
    label_values = np.unique(df['label'].values)
    MNIST_SIZE = 32

In [None]:
df.head()

In [None]:
idx_to_character = {i:c for i,c in enumerate(char_values)}
character_to_idx = {c:i for i,c in enumerate(char_values)}

In [None]:
# Parameters
batch_size = 128     #16 * strategy.num_replicas_in_sync  # Batch size
epochs = 50         # Number of training epochs
patience = epochs    # Patience for early stopping
l2_reg = 5e-4        # Regularization rate for l2

if DEBUG:
    df = df[0:-1:15]  #debug
    epochs = 20

In [None]:
def decode_image(df):
    features = []
    labels = []
    
    if LANGUAGE ==0:
    # index extracted: suite_id: 1, sample_id: 3, code: 4
    # resulted file name: input_1_3_4.jpg        
        for i in tqdm(range(len(df))):
            image_path = path + 'input_' + str(df.iloc[i][0]) + "_" + str(df.iloc[i][1]) + "_" + str(df.iloc[i][2]) + ".jpg" 
            image_arr = cv2.imread(image_path,0)    #flag 0=grayscale(64,64), -1=as is (64,64,3)
            features.append(image_arr)
            labels.append(character_to_idx[df.iloc[i]['character']])
  
    else:    
        for i, file in enumerate(tqdm(df['file'].values)):
            image_path = path + file
            image_arr = cv2.imread(image_path,0)    #flag 0=grayscale(32,32), -1=as is (32,32,4)
            features.append(cv2.resize(image_arr,(MNIST_SIZE, MNIST_SIZE)))
            labels.append(character_to_idx[df.iloc[i]['letter']])
                
    features = np.array(features)
    labels = np.array(labels)
    features = features / 255.
    return features, labels

In [None]:
features, labels = decode_image(df)
n_out = len(char_values)
# shuffle
features, labels = shuffle(features, labels, random_state=0)

features.shape, labels.shape, features.min(), features.max()

In [None]:
for i in range(len(char_values)):
    print(f'{i}:{char_values[i]}  ', end=' ')

In [None]:
num_plt = 5   
num_fig = num_plt**2

plt.figure(figsize=(12,10))
for i in range(num_fig):
    plt.subplot(num_plt,num_plt,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(features[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=1)
    plt.title(labels[i])             #True label


In [None]:
class MNIST_CH(Dataset):
    """
    For efficiency, the adjacency matrix is stored in a special attribute of the
    dataset and the Graphs only contain the node features.

    **Arguments**
    - `p_flip`: if >0, then edges are randomly flipped from 0 to 1 or vice versa
    with that probability.
    - `k`: number of neighbours of each node.
    """
    
    def __init__(self, features=features, labels=labels, p_flip=0., k=8, **kwargs):
        self.a = None
        self.features = features
        self.labels = labels
        self.k = k
        self.p_flip = p_flip
        super().__init__(**kwargs)
    
    def read(self):
        self.a = _mnist_grid_graph(self.k)
        self.a = _flip_random_edges(self.a, self.p_flip)
        x = self.features.reshape(-1, MNIST_SIZE ** 2, 1)
        y = self.labels          
        return [Graph(x=x_, y=y_) for x_, y_ in zip(x, y)]

    
def _grid_coordinates(side):
    """
    Returns 2D coordinates for a square grid of equally spaced nodes.
    :param side: int, the side of the grid (i.e., the grid has side * side nodes).
    :return: np.array of shape (side * side, 2).
    """
    M = side ** 2
    x = np.linspace(0, 1, side, dtype=np.float32)
    y = np.linspace(0, 1, side, dtype=np.float32)
    xx, yy = np.meshgrid(x, y)
    z = np.empty((M, 2), np.float32)
    z[:, 0] = xx.reshape(M)
    z[:, 1] = yy.reshape(M)
    return z


def _get_adj_from_data(X, k, **kwargs):
    """
    Computes adjacency matrix of a K-NN graph from the given data.
    :param X: rank 1 np.array, the 2D coordinates of pixels on the grid.
    :param kwargs: kwargs for sklearn.neighbors.kneighbors_graph (see docs
    [here](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.kneighbors_graph.html)).
    :return: scipy sparse matrix.
    """
    A = kneighbors_graph(X, k, **kwargs).toarray()
    A = sp.csr_matrix(np.maximum(A, A.T))

    return A


def _mnist_grid_graph(k):
    """
    Get the adjacency matrix for the KNN graph.
    :param k: int, number of neighbours for each node;
    :return:
    """
    X = _grid_coordinates(MNIST_SIZE)
    A = _get_adj_from_data(X, k, 
                           mode='connectivity', 
                           metric='euclidean', 
                           include_self=False)
    return A


def _flip_random_edges(A, percent):
    """
    Flips values of A randomly.
    :param A: binary scipy sparse matrix.
    :param percent: percent of the edges to flip.
    :return: binary scipy sparse matrix.
    """
    if not A.shape[0] == A.shape[1]:
        raise ValueError('A must be a square matrix.')
    
    dtype = A.dtype
    A = sp.lil_matrix(A).astype(np.bool)
    
    n_elem = A.shape[0] ** 2
    n_elem_to_flip = round(percent * n_elem)
    unique_idx = np.random.choice(n_elem, replace=False, size=n_elem_to_flip)
    row_idx = unique_idx // A.shape[0]
    col_idx = unique_idx % A.shape[0]
    idxs = np.stack((row_idx, col_idx)).T
    
    for i in idxs:
        i = tuple(i)
        A[i] = np.logical_not(A[i])
    A = A.tocsr().astype(dtype)
    A.eliminate_zeros()
    return A


def _grid_coordinates_from_img(in_img, threshold):
    """
    Returns 2D coordinates for a square grid of equally spaced nodes.
    :param side: int, the side of the grid (i.e., the grid has side * side nodes).
    :return: np.array of shape (side * side, 2).
    """
    x = np.linspace(0, 1, in_img.shape[0], dtype=np.float32)
    y = np.linspace(0, 1, in_img.shape[1], dtype=np.float32)
    xx, yy = np.meshgrid(x, y)
    z = np.stack([
        xx[in_img>threshold].ravel(),
        yy[in_img>threshold].ravel(),
        in_img[in_img>threshold].ravel(),
                 ], -1)
    z = z[np.argsort(-z[:, 2]), :] # sort by pixel value
    return z

def _mnist_img_grid_graph(in_img, k, threshold=0.6):
    """
    Get the adjacency matrix for the KNN graph.
    :param k: int, number of neighbours for each node;
    :return:
    """
    X = _grid_coordinates_from_img(in_img, threshold=threshold)
    
    A = _get_adj_from_data(X[:, :2], k, mode='distance', 
                                        metric='euclidean', 
                                        include_self=False)
    return A, X



plt.rcParams["figure.figsize"] = (6, 6)
plt.rcParams["figure.dpi"] = 125
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})
plt.rcParams['image.cmap'] = 'gray' # grayscale looks better

def draw_graph_mpl(g, pos=None, ax=None, layout_func=nx.drawing.layout.kamada_kawai_layout, draw_labels=True):
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(12, 12))
    else:
        fig = None
    if pos is None:
        pos = layout_func(g)
    node_color = []
    node_labels = {}
    shift_pos = {}
    for k in g:
        node_color.append(g.nodes[k].get('color', 'green'))
        node_labels[k] = g.nodes[k].get('label', k)
        shift_pos[k] = [pos[k][0], pos[k][1]]
    
    edge_color = []
    edge_width = []
    for e in g.edges():
        edge_color.append(g.edges[e].get('color', 'black'))
        edge_width.append(g.edges[e].get('width', 0.5))
    nx.draw_networkx_edges(g, pos, font_weight='bold', edge_color=edge_color, width=edge_width, alpha=0.5, ax=ax)
    nx.draw_networkx_nodes(g, pos, node_color=node_color, node_shape='p', node_size=300, alpha=0.75, ax=ax)
    if draw_labels:
        nx.draw_networkx_labels(g, shift_pos, labels=node_labels, arrows=True, ax=ax)
    ax.autoscale()
    return fig, ax, pos  

In [None]:
data = MNIST_CH(features, labels, k=8, p_flip=0 )

adj = data.a
adj_dense = GCNConv.preprocess(adj)
adj = sp_matrix_to_sp_tensor(adj_dense)

np.random.seed(seed=2020)
data_tr  = data[:]
loader_tr = PackedBatchLoader(data_tr, batch_size=batch_size, epochs=1)

batches = [b for b in loader_tr]
x,y = batches[-1]

x.shape, y.shape

In [None]:
adj, vec = _mnist_img_grid_graph(x[1].reshape(MNIST_SIZE, MNIST_SIZE), 8, threshold=0.0)
print('Label:', y[1], idx_to_character[y[1]])

#visualize network in unraveled 2D array with threshold=0
plt.matshow(adj.todense(), cmap='viridis')

In [None]:
G = nx.from_scipy_sparse_matrix(adj)

for k, (xval, yval, cval) in zip(G.nodes, vec):
    G.nodes[k]['color'] = plt.cm.jet(cval)
for e in G.edges():
    G.edges[e]['width'] = 0.1/G.edges[e]['weight']
    G.edges[e]['color'] = plt.cm.magma(0.025/G.edges[e]['weight'])
    
#visualize network in X, Y coordinates with threshold=0
draw_graph_mpl(G, pos=vec[:, :2]);

In [None]:
MNIST_SQ = MNIST_SIZE**2
data = MNIST_CH(features, labels, k=8, p_flip=15/MNIST_SQ**2 )

adj = data.a
adj_dense = GCNConv.preprocess(adj)
adj = sp_matrix_to_sp_tensor(adj_dense)

np.random.seed(seed=2020)
data_tr  = data[:]
loader_tr = PackedBatchLoader(data_tr, batch_size=batch_size, epochs=1)

batches = [b for b in loader_tr]
x,y = batches[-1]

data_tr, x.shape, y.shape

In [None]:
adj, vec = _mnist_img_grid_graph(x[1].reshape(MNIST_SIZE, MNIST_SIZE), 8, threshold=0.1)
print('Label:', y[1], idx_to_character[y[1]])

#visualize network in unraveled 2D array with p_flip, threshold
plt.matshow(adj.todense(), cmap='viridis')

In [None]:
G = nx.from_scipy_sparse_matrix(adj)

for k, (xval, yval, cval) in zip(G.nodes, vec):
    G.nodes[k]['color'] = plt.cm.jet(cval)
for e in G.edges():
    G.edges[e]['width'] = 0.1/G.edges[e]['weight']
    G.edges[e]['color'] = plt.cm.magma(0.025/G.edges[e]['weight'])

#visualize network in X, Y coordinates with p_flip, threshold
draw_graph_mpl(G, pos=vec[:, :2]);

In [None]:
# Load data
data = MNIST_CH(features, labels, k=8, p_flip=0 )

In [None]:
# The adjacency matrix is stored as an attribute of the dataset.
# Create filter for GCN and convert to sparse tensor.
adj = data.a
adj = GCNConv.preprocess(adj)
adj = sp_matrix_to_sp_tensor(adj)

#Train/valid/test split
p_split = int(0.15 * len(data))  #15 percent split

np.random.seed(seed=2020)
np.random.shuffle(data)
data_tr, data_va = data[:-p_split], data[-p_split:]

#Train/test split
data_tr, data_te = data[:-p_split], data[-p_split:]

len(data_tr), len(data_va), len(data_te)

In [None]:
# Build model
class Net(Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = GCNConv(32, activation='elu', kernel_regularizer=l2(l2_reg))
        self.conv2 = GCNConv(32, activation='elu', kernel_regularizer=l2(l2_reg))
        self.flatten = Flatten()
        self.fc1 = Dense(512, activation='relu')
        self.fc2 = Dense(n_out, activation='softmax')  # MNIST_CH hChinese=15, Russian=33 classes

    def call(self, inputs):
        x, a = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        output = self.flatten(x)
        output = self.fc1(output)
        output = self.fc2(output)

        return output
    
# Build model
class Net_Russian(Model):
#epcho  50/50   | Train loss: 3.5015, acc: 0.0302 | Valid loss: 3.4991, acc: 0.0302 | Test loss: 3.4643, acc: 0.0356
#GPU Total Epoch:50, time elapse:62.82459878921509

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = GCNConv(32, activation='elu', kernel_regularizer=l2(l2_reg))
        self.conv2 = GCNConv(32, activation='elu', kernel_regularizer=l2(l2_reg))
        self.flatten = Flatten()
        self.fc1 = Dense(512, activation='relu')
        self.fc2 = Dense(512, activation='relu')
        self.fc3 = Dense(n_out, activation='softmax')  # MNIST_CH Chinese=15, Russian=33 classes

    def call(self, inputs):
        x, a = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        output = self.flatten(x)
        output = self.fc1(output)
        output = self.fc2(output)
        output = self.fc3(output)

        return output    

In [None]:
# Create model
with strategy.scope():
    if LANGUAGE==0:
        model = Net()
    else:
        model = Net_Russian()  #need further hyperparams tuning
        
    optimizer = Adam(lr=0.001)
    loss_fn = SparseCategoricalCrossentropy()
    acc_fn = SparseCategoricalAccuracy()

In [None]:
# Training function
@tf.function
def train_on_batch(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
        acc = acc_fn(target, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss, acc


# Evaluation function
def evaluate(loader):
    step = 0
    results = []
    for batch in loader:
        step += 1
        x, target = batch
        predictions = model([x, adj], training=False)
        loss = loss_fn(target, predictions)
        acc = acc_fn(target, predictions)
        results.append((loss, acc, len(target)))  # Keep track of batch size
        if step == loader.steps_per_epoch:
            results = np.array(results)
            return np.average(results[:, :-1], 0, weights=results[:, -1])

In [None]:
# Setup training
best_val_loss = 99999
current_patience = patience
step = 0

# We can use PackedBatchLoader because we only need to create batches of node
# features with the same dimensions.
loader_tr = PackedBatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = PackedBatchLoader(data_va, batch_size=batch_size)
loader_te = PackedBatchLoader(data_te, batch_size=batch_size)

In [None]:
# Training loop
results_tr = []
loss_history = []

epoch_cnt = 0
epoch_mod = 1

if (100 < epochs < 1000):
    epoch_mod=10
elif (epochs > 1000):
    epoch_mod=20
    
start_time = time.time()

for batch in loader_tr:
    
    step += 1

    # Training step
    x, y = batch
    loss, acc = train_on_batch([x, adj], y)
    results_tr.append((loss, acc, len(y)))

    if step == loader_tr.steps_per_epoch:
        results_va = evaluate(loader_va)       #VALIDATE
        if results_va[0] < best_val_loss:
            best_val_loss = results_va[0]
            current_patience = patience
            results_te = evaluate(loader_te)   #TEST
        else:
            current_patience -= 1
            if current_patience == 0:
                print('Early stopping')
                break
        
        # Print results
        results_tr = np.array(results_tr)
        results_tr = np.average(results_tr[:, :-1], 0, weights=results_tr[:, -1])
        epoch_cnt +=1
        if(epoch_cnt%epoch_mod == 0):  
            print('epcho{:4}/{:<4} | '
                  'Train loss: {:.4f}, acc: {:.4f} | '
                  'Valid loss: {:.4f}, acc: {:.4f} | '
                  'Test loss: {:.4f}, acc: {:.4f}'
                  .format(epochs, epoch_cnt, *results_tr, *results_va, *results_te))

        loss_history.append((results_tr[0], results_tr[1], 
                             results_va[0], results_va[1],
                             results_te[0], results_te[1]))
        
        # Reset epoch
        results_tr = []
        step = 0
        
loss_history = np.array(loss_history) 
end_time = time.time()

print(f'Total Epoch:{epoch_cnt}, time elapse:{end_time - start_time}')

CHINESE Handwritten Digit

GPU: epcho  50/50   | Train loss: 0.0060, acc: 0.9581 | Valid loss: 0.3431, acc: 0.9583 | Test loss: 0.3431, acc: 0.9583
#Total Epoch:50, time elapse:235.3305425643921

8TPUs: epcho  50/50   | Train loss: 0.0061, acc: 0.9579 | Valid loss: 0.3526, acc: 0.9582 | Test loss: 0.3502, acc: 0.9569
#Total Epoch:50, time elapse:1778.945945739746

In [None]:
model.summary()

In [None]:
# Plots
plt.figure(figsize=(15, 4))

plt.subplot(121)
plt.plot(loss_history[:, 0], label='Train loss')
plt.plot(loss_history[:, 2], label='Val loss')
plt.plot(loss_history[:, 4], label='Test loss')
plt.legend()
plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.subplot(122)
plt.plot(loss_history[:, 1], label='Train acc')
plt.plot(loss_history[:, 3], label='Val acc')
plt.plot(loss_history[:, 5], label='Test acc')
plt.legend()
plt.ylabel('Acc')
plt.xlabel('Epoch')

plt.show()

In [None]:
print('Validate model')
y_pred=[]
y_true=[]

loader_va = PackedBatchLoader(data_va, batch_size=batch_size, epochs=1)
#batches = [b for b in loader_va]
#(x, a, e), y = batches[-1]

#x,y = batches[-1]
for batch in loader_va:
    x_va, y_va = batch   
    p_va = model([x_va, adj], training=False)  #predict label per batch  
    y_pred.append([np.argmax(g) for g in p_va])   
    y_va = np.vstack(y_va)                     #True label per batch
    y_true.append(y_va)

In [None]:
x.shape, y.shape, np.shape(y_true),np.shape(y_pred), y_va.shape, p_va.shape

In [None]:
#Last batch of Validate images

val_images = np.squeeze(x_va)     #(batch,IMG_SIZE**2) <- (batch,MNIST_SIZE**2,1)
val_images = np.reshape(val_images,[val_images.shape[0], MNIST_SIZE,-1])
y_va = np.squeeze(y_va)

num_plt = int(np.sqrt(len(val_images)))
num_plt = np.amin([5,num_plt])
num_fig = num_plt**2

plt.figure(figsize=(10,12))
for i in range(num_fig):
    plt.subplot(num_plt,num_plt,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(val_images[i], cmap=plt.cm.binary)
    plt.xlabel(np.argmax(p_va[i])) #Predict label
    plt.title(y_va[i])             #True label 
plt.show()

In [None]:
for i in range(len(char_values)):
    print(f'{i}:{char_values[i]}  ', end=' ')

In [None]:
print('Testing model')

#loader_te = PackedBatchLoader(data_te, batch_size=data_te.n_graphs, epochs=1)
loader_te = PackedBatchLoader(data_te, batch_size=batch_size, epochs=1)
y_pred=[]
y_true=[]
for batch in loader_te:
    x_te, y_te = batch   
    p_te = model([x_te, adj], training=False)  #predict label per batch     
    pp= np.vstack([np.argmax(g) for g in p_te])
    y_pred.append(pp)
    y_te = np.vstack(y_te)
    y_true.append(y_te)


len(data_te)

In [None]:
#Plot last batch of Test images

test_images = np.squeeze(x_te)
test_images = np.reshape(test_images,[test_images.shape[0], MNIST_SIZE,-1])
y_te = np.squeeze(y_te)

#Plot Test images
num_plt = int(np.sqrt(len(test_images)))
num_plt = np.amin([5,num_plt])
num_fig = num_plt**2

plt.figure(figsize=(10,12))
for i in range(num_fig):
    plt.subplot(num_plt,num_plt,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_images[i], cmap=plt.cm.binary)
    plt.xlabel(np.argmax(p_te[i])) #Predict label
    plt.title(y_te[i])             #True label
plt.show()
