## Import necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from catboost import CatBoostClassifier
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Utilities

In [None]:
def sample_images(images, row_count, column_count):
    fig, axs = plt.subplots(row_count, column_count, figsize=(10,10))
    for i in range(row_count):
        for j in range(column_count):
            axs[i,j].imshow(images[i * column_count + j])
            axs[i,j].axis('off')
    plt.show()

## Load data

In [None]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [None]:
train.head()

In [None]:
train_labels = train.pop("label")

In [None]:
test.head()

In [None]:
row_count = 15
col_count = 15
images = np.array(train.iloc[np.random.choice(train.shape[0], row_count * col_count)])[:, :784].reshape((-1, 28, 28))
sample_images(images, row_count, col_count)

## Model Development & Evaluation

In [None]:
def evaluate(y_true, y_pred):
    print(y_true.shape, y_pred.shape)
    acc = sklearn.metrics.accuracy_score(y_true, y_pred)
    print("Accuracy score: %.2f"%(acc))
    print("Classification report:")
    print(sklearn.metrics.classification_report(y_true, y_pred))

In [None]:
kfold = sklearn.model_selection.StratifiedKFold(5, shuffle=True, random_state=42)
models = []
# Train Validation Split is regarded as calling K-Fold algorithm once.
is_validation_split = False
for fold, (train_indices, val_indices) in enumerate(kfold.split(train, train_labels)):
    print("Training with fold %d"%(fold + 1))
    X_train = train.iloc[train_indices]
    y_train = train_labels.iloc[train_indices]
    X_val = train.iloc[val_indices]
    y_val = train_labels.iloc[val_indices]
    cat_params = {
        'iterations': 10000, 
        'depth': 6, 
        'task_type' : 'GPU',
        'use_best_model': True,
        'early_stopping_rounds': 1000,
        'learning_rate': 0.03,
        "verbose": 1000
    }
    cat = CatBoostClassifier(
        **cat_params
    )
    cat.fit(X_train, y_train, eval_set=[(X_val, y_val)])
    models.append(cat)
    evaluate(y_val, cat.predict(X_val).reshape(-1))
    if is_validation_split:
        break

## Submission

In [None]:
test_labels = np.argmax(np.mean([model.predict_proba(test) for model in models], axis=0), axis=-1)
image_ids = np.arange(1, test_labels.shape[0]+1)
result = np.concatenate((image_ids.reshape(image_ids.shape[0], 1), test_labels.reshape(test_labels.shape[0], 1)), axis=1)
df = pd.DataFrame(result, columns=["ImageId", "Label"], dtype='int')
df.to_csv("submission.csv", index=False)