In [1]:
from caffe2.python import workspace
from caffe2.python import model_helper
from caffe2.python import brew, core, cnn
from caffe2.proto import caffe2_pb2
import numpy as np
import time



In [2]:
import pandas as pd
df = pd.read_csv('digit-recognizer/train.csv')
df_test = pd.read_csv('digit-recognizer/test.csv')

from sklearn.model_selection import train_test_split

# prepare data
labels_numpy = df.label.values
features_numpy = df.loc[:, df.columns != 'label'].values / 255.0

X_train, X_valid, y_train, y_valid = train_test_split(
    features_numpy, labels_numpy, test_size=0.2, random_state=42)

X_test = df_test.values / 255.0

In [3]:
def create_database(db_name, images, labels=None):
    # Create empty leveldb database
    # TODO why can not create leveldb
    db = core.C.create_db('minidb', db_name, core.C.Mode.new)
    transaction = db.new_transaction()
    
    # Move all data to the database
    for i in range(images.shape[0]):
        tensor_protos = caffe2_pb2.TensorProtos()
        
        # Copy image with MNIST number
        img_tensor = tensor_protos.protos.add()
        img_tensor.dims.extend(images[i].shape)
        img_tensor.data_type = 1
        flatten_img = images[i].reshape(np.prod(images[i].shape))
        img_tensor.float_data.extend(flatten_img)

        # Copy label for each number
        label_tensor = tensor_protos.protos.add()
        label_tensor.data_type = 2
        if labels is not None:
            label_tensor.int32_data.append(labels[i])
        else:
            label_tensor.int32_data.append(-1)

        # Add data in transaction
        transaction.put('%0.6d' % i, tensor_protos.SerializeToString())

    # Close the transaction and close the database
    del transaction
    del db

create_database('/tmp/db_train', X_train, y_train)
create_database('/tmp/db_validation', X_valid, y_valid)
create_database('/tmp/db_test', X_test)

In [4]:
def db_input(model, blobs_out, batch_size, db, db_type):
    dbreader_name = "dbreader_" + db
    dbreader = model.param_init_net.CreateDB(
        [],
        dbreader_name,
        db=db,
        db_type=db_type,
    )
    return model.net.TensorProtosDBInput(
        dbreader, blobs_out, batch_size=batch_size)

In [5]:
def create_model(name, db_name, batch_size=100, hidden_dim=150, output_dim=10, train=True, accuracy=True):
    model = model_helper.ModelHelper(name=name)

    # Prepare data input operator that will fetch data from DB
    data, label = db_input(
        model,
        ['data', 'label'],
        batch_size=batch_size,
        db=db_name,
        # db_type='leveldb')
        db_type='minidb')
    data = model.StopGradient(data, data)
    fc1 = brew.fc(model, data, "fc1", dim_in=28 * 28, dim_out=hidden_dim)
    # model.param_init_net.UniformFill([], "fc1_b", shape=[hidden_dim], min=-0.04, max=0.04)
    relu1 = model.Relu(fc1, "relu1")
    
    fc2 = brew.fc(model, relu1, "fc2", dim_in=hidden_dim, dim_out=hidden_dim)
    # model.param_init_net.UniformFill([], "fc2_b", shape=[hidden_dim], min=-0.08, max=0.08)
    tanh2 = model.Tanh(fc2, "tanh2")
    
    fc3 = brew.fc(model, tanh2, "fc3", dim_in=hidden_dim, dim_out=hidden_dim)
    # model.param_init_net.UniformFill([], "fc3_b", shape=[hidden_dim], min=-0.08, max=0.08)
    elu3 = model.Elu("fc3", "elu3")
    
    fc4 = brew.fc(model, "elu3", "fc4", dim_in=hidden_dim, dim_out=output_dim)
    # model.param_init_net.UniformFill([], "fc4_b", shape=[output_dim], min=-0.08, max=0.08)
    
    softmax = model.Softmax(fc4, 'softmax')

    # Check if we need to add training operators
    if train:
        # Prepare Cross Entropy operators with loss
        xent = model.LabelCrossEntropy([softmax, label], 'xent')
        loss = model.AveragedLoss(xent, "loss")

        # Add all gradient operators that will be needed to calculate our loss and train our model
        model.AddGradientOperators([loss])
        
        # Prepare variables for SGD
        ITER = model.Iter([], "iter")
        # LR = model.LearningRate(ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999)
        lr = -0.02
        # lr = -0.04
        LR = model.param_init_net.ConstantFill([], "LR", shape=[1], value=lr)
        ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
        
        # Update all gradients for each params
        for param in model.params:
            # Note how we get the gradient of each parameter - CNNModelHelper keeps
            # track of that
            param_grad = model.param_to_grad[param]
            
            # The update is a simple weighted sum: param = param + param_grad * LR
            model.WeightedSum([param, ONE, param_grad, LR], param)

    # Add accuracy metrics if needed
    if accuracy:
        model.Accuracy([softmax, label], "accuracy")
    
    return model


In [6]:
train_model = create_model("train", "/tmp/db_train")
validation_model = create_model("validation", "/tmp/db_validation", train=False)
test_model = create_model('test_model', '/tmp/db_test', train=False, accuracy=False)

workspace.RunNetOnce(test_model.param_init_net)
workspace.RunNetOnce(validation_model.param_init_net)
workspace.RunNetOnce(train_model.param_init_net)

with open('/tmp/proto', 'w') as f:
    f.write(str(train_model.net.Proto()))
with open('/tmp/proto.init', 'w') as f:
    f.write(str(train_model.param_init_net.Proto()))
    
with open('/tmp/proto.init.val', 'w') as f:
    f.write(str(validation_model.param_init_net.Proto()))



In [7]:
def calculate_validation_accuracy():
    # Initialize our model
    # workspace.RunNetOnce(validation_model.param_init_net)
    workspace.CreateNet(validation_model.net, overwrite=True)
    
    # Iterate over all validation dataset
    all_accuracy = []
    for i in range(X_valid.shape[0]//100):
        workspace.RunNet(validation_model.net.Proto().name)
        all_accuracy.append(workspace.FetchBlob('accuracy'))
    
    # Return mean accuracy for validation dataset
    return np.array(all_accuracy).mean()

In [8]:
def inspect():
    fc1_b = workspace.FetchBlob("fc1_b")
    print(fc1_b)

In [9]:
# Initialize out training model
workspace.RunNetOnce(train_model.param_init_net)
# inspect()
workspace.CreateNet(train_model.net, overwrite=True)

# Iterate over all epochs
# NUMBER_OF_EPOCHS = 10000
NUMBER_OF_EPOCHS = 10000
for i in range(NUMBER_OF_EPOCHS):
    # Train our model
    start_time = time.time()
    workspace.RunNet(train_model.net.Proto().name)
    
    # Once per 20 epochs let's run validation and print results
    if (i+1) % 500 == 0:
        train_loss = workspace.FetchBlob('loss')
        train_accuracy = workspace.FetchBlob('accuracy')
        val_accuracy = calculate_validation_accuracy()
        epoch_time = time.time()-start_time
        print(('Epoch #%d/%d TIME_per_epoch: %.3fs '+
               'TRAIN_Loss: %.4f TRAIN_Acc: %.4f '+
               'VAL_Acc: %.4f') % (i+1, NUMBER_OF_EPOCHS, epoch_time, train_loss, train_accuracy, val_accuracy))
        # inspect()
        

Epoch #500/10000 TIME_per_epoch: 0.120s TRAIN_Loss: 0.3850 TRAIN_Acc: 0.9100 VAL_Acc: 0.8923
Epoch #1000/10000 TIME_per_epoch: 0.099s TRAIN_Loss: 0.2861 TRAIN_Acc: 0.8800 VAL_Acc: 0.9143
Epoch #1500/10000 TIME_per_epoch: 0.101s TRAIN_Loss: 0.1244 TRAIN_Acc: 0.9800 VAL_Acc: 0.9239
Epoch #2000/10000 TIME_per_epoch: 0.097s TRAIN_Loss: 0.1886 TRAIN_Acc: 0.9600 VAL_Acc: 0.9307
Epoch #2500/10000 TIME_per_epoch: 0.098s TRAIN_Loss: 0.2135 TRAIN_Acc: 0.9400 VAL_Acc: 0.9393
Epoch #3000/10000 TIME_per_epoch: 0.097s TRAIN_Loss: 0.0904 TRAIN_Acc: 0.9700 VAL_Acc: 0.9445
Epoch #3500/10000 TIME_per_epoch: 0.145s TRAIN_Loss: 0.1791 TRAIN_Acc: 0.9400 VAL_Acc: 0.9486
Epoch #4000/10000 TIME_per_epoch: 0.096s TRAIN_Loss: 0.0442 TRAIN_Acc: 0.9900 VAL_Acc: 0.9537
Epoch #4500/10000 TIME_per_epoch: 0.097s TRAIN_Loss: 0.2320 TRAIN_Acc: 0.9100 VAL_Acc: 0.9540
Epoch #5000/10000 TIME_per_epoch: 0.097s TRAIN_Loss: 0.0880 TRAIN_Acc: 0.9700 VAL_Acc: 0.9565
Epoch #5500/10000 TIME_per_epoch: 0.105s TRAIN_Loss: 0.1856 T

In [10]:
# Initialize out prediction model

# workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

# Iterate over all test dataset
predicted_labels = []
for i in range(X_test.shape[0]//100):
    # Run our model for predicting labels
    workspace.RunNet(test_model.net.Proto().name)
    batch_prediction = workspace.FetchBlob('softmax')
    if (i+1) % 20 == 0:
        print('Predicting #{}/{}...'.format(i+1, X_test.shape[0]/100))
    
    # Retrieve labels
    for prediction in batch_prediction:
        predicted_labels.append(np.argmax(prediction))  # Label = index of max argument

Predicting #20/280.0...
Predicting #40/280.0...
Predicting #60/280.0...
Predicting #80/280.0...
Predicting #100/280.0...
Predicting #120/280.0...
Predicting #140/280.0...
Predicting #160/280.0...
Predicting #180/280.0...
Predicting #200/280.0...
Predicting #220/280.0...
Predicting #240/280.0...
Predicting #260/280.0...
Predicting #280/280.0...


In [11]:
raw_dict = {}
for i, v in enumerate(predicted_labels):
    raw_dict[i + 1] = v

out_df = pd.DataFrame(
    data={
        "ImageId": raw_dict.keys(),
        "Label": raw_dict.values(),
    }
)
out_df.to_csv('/tmp/caffe2_nn_2', index=False)