In [None]:
from caffe2.python import workspace
from caffe2.python import model_helper
from caffe2.python import brew, core

import numpy as np
import time

import pandas as pd
df = pd.read_csv('../digit-recognizer/train.csv')
df_test = pd.read_csv('../digit-recognizer/test.csv')

In [None]:
labels_np = df.label.values
features_np = df.loc[:, df.columns != 'label'].values / 255.0

from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(features_np, labels_np, test_size=0.2, random_state=23)

X_test = df_test.values / 255.0

In [None]:
train_db_path = "/tmp/train.db"
valid_db_path = "/tmp/valid.db"
test_db_path = "/tmp/test.db"

In [None]:
from caffe2.proto import caffe2_pb2

def create_database(db_name, features, labels=None):
    db = core.C.create_db("minidb", db_name, core.C.Mode.new)
    tx = db.new_transaction()
    for i in range(features.shape[0]):
        tensor_protos = caffe2_pb2.TensorProtos()
        
        feature_proto = tensor_protos.protos.add()
        feature_proto.dims.extend(features[i].shape)
        feature_proto.data_type = caffe2_pb2.TensorProto.DataType.FLOAT
        feature_proto.float_data.extend(features[i].reshape(-1))
        
        label_proto = tensor_protos.protos.add()
        label_proto.data_type = caffe2_pb2.TensorProto.DataType.INT32
        label_proto.int32_data.append(labels[i] if labels is not None else -1)
        
        tx.put('k', tensor_protos.SerializeToString())
    del tx
    del db

create_database(train_db_path, X_train, y_train)
create_database(valid_db_path, X_valid, y_valid)
create_database(test_db_path, X_test)

In [None]:
def db_input(model, blobs_out, batch_size, db, db_type):
    dbreader_name = f"dbreader_{db}"
    dbreader = model.param_init_net.CreateDB(
        [],
        dbreader_name,
        db=db,
        db_type=db_type,
    )
    return model.net.TensorProtosDBInput(
        dbreader, blobs_out, batch_size=batch_size,
    )

In [None]:
def create_model(name, db_name, batch_size=100, hidden_dim=150, train=True, accuracy=True):
    model = model_helper.ModelHelper(name)
    
    data, label = db_input(
        model,
        ["data", "label"],
        batch_size=batch_size,
        db=db_name,
        db_type="minidb",
    )
    fc1 = brew.fc(model, data, 'fc1', dim_in=28 * 28, dim_out=hidden_dim)
    relu1 = model.Relu(fc1, "relu1")
    
    fc2 = brew.fc(model, relu1, "fc2", dim_in=hidden_dim, dim_out=hidden_dim)
    tanh2 = model.Tanh(fc2, "tanh2")
    
    fc3 = brew.fc(model, tanh2, "fc3", dim_in=hidden_dim, dim_out=hidden_dim)
    elu3 = model.Elu('fc3', 'elu3')
    
    fc4 = brew.fc(model, "elu3", "fc4", dim_in=hidden_dim, dim_out=10)
    softmax = model.Softmax(fc4, "softmax")
    
    if train:
        xent = model.LabelCrossEntropy([softmax, label], "xent")
        loss = model.AveragedLoss(xent, "loss")
        
        model.AddGradientOperators([loss])
        
        LR = model.param_init_net.ConstantFill([], "LR", shape=[1], value=-0.02)
        ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
        
        for param in model.params:
            param_grad = model.param_to_grad[param]
            model.WeightedSum([param, ONE, param_grad, LR], param)
    if accuracy:
        model.Accuracy([softmax, label], "accuracy")
    return model

train_model = create_model("train_model", train_db_path, train=True, accuracy=True)
valid_model = create_model("valid_model", valid_db_path, train=False, accuracy=True)
test_model = create_model("test_model", test_db_path, train=False, accuracy=False)

In [None]:
workspace.RunNetOnce(test_model.param_init_net)
workspace.RunNetOnce(valid_model.param_init_net)
workspace.RunNetOnce(train_model.param_init_net)

In [None]:
def calc_valid_metrics():
    workspace.CreateNet(valid_model.net, overwrite=True)
    all_accuracy = []
    for i in range(X_valid.shape[0] // 100):
        workspace.RunNet(valid_model.net.Name())
        all_accuracy.append(workspace.FetchBlob("accuracy"))
    
    return np.array(all_accuracy).mean()

workspace.CreateNet(train_model.net, overwrite=True)

NUM_EPOCHS = 10000
all_train_loss = []
all_train_accuracy = []
all_valid_accuracy = []
for i in range(NUM_EPOCHS):
    start_ts = time.time()
    workspace.RunNet(train_model.net.Name())
    
    if (i + 1) % 100 == 0:
        train_loss = workspace.FetchBlob("loss")
        all_train_loss.append(train_loss)
        train_accuracy = workspace.FetchBlob("accuracy")
        all_train_accuracy.append(train_accuracy)
        valid_accuracy = calc_valid_metrics()
        all_valid_accuracy.append(valid_accuracy)
        epoch_time = time.time() - start_ts
        print(f"Epoch #{i + 1}/{NUM_EPOCHS}: time elapsed for this epoch {epoch_time}"
              + f", train_loss {train_loss}, train accuracy {train_accuracy}, valid accuracy {valid_accuracy}"
        )

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
xlist = range(len(all_train_loss))
ax.plot(xlist, all_train_loss, label="train_loss")
ax.plot(xlist, all_train_accuracy, label="train_accuracy")
ax.plot(xlist, all_valid_accuracy, label="valid_accuracy")
plt.legend()

In [None]:
workspace.CreateNet(test_model.net, overwrite=True)

predicted_labels = []
for i in range(X_test.shape[0] // 100):
    workspace.RunNet(test_model.net.Name())
    batch_pred = workspace.FetchBlob("softmax")
    if (i + 1) % 20 == 0:
        print(f"Preciction #{i + 1}/{X_test.shape[0] / 100}...")
    for pred in batch_pred:
        predicted_labels.append(np.argmax(pred))

In [None]:
out_df = pd.DataFrame(data={
    "ImageId": range(1, len(predicted_labels) + 1),
    "Label": predicted_labels
})
out_df.to_csv("/tmp/c2_ref.ans", index=False)