In [1]:
from caffe2.python import workspace
from caffe2.python import model_helper
from caffe2.python import brew, core, cnn
from caffe2.proto import caffe2_pb2
import numpy as np
import time



In [2]:
import pandas as pd
df = pd.read_csv('digit-recognizer/train.csv')
df_test = pd.read_csv('digit-recognizer/test.csv')

from sklearn.model_selection import train_test_split

# prepare data
labels_numpy = df.label.values
features_numpy = df.loc[:, df.columns != 'label'].values / 255.0

X_train, X_valid, y_train, y_valid = train_test_split(
    features_numpy, labels_numpy, test_size=0.2, random_state=42)

X_test = df_test.values / 255.0

In [3]:
def create_database(db_name, images, labels=None):
    # Create empty leveldb database
    # TODO why can not create leveldb
    db = core.C.create_db('minidb', db_name, core.C.Mode.write)
    transaction = db.new_transaction()
    
    # Move all data to the database
    for i in range(images.shape[0]):
        tensor_protos = caffe2_pb2.TensorProtos()
        
        # Copy image with MNIST number
        img_tensor = tensor_protos.protos.add()
        img_tensor.dims.extend(images[i].shape)
        img_tensor.data_type = 1
        flatten_img = images[i].reshape(np.prod(images[i].shape))
        img_tensor.float_data.extend(flatten_img)

        # Copy label for each number
        label_tensor = tensor_protos.protos.add()
        label_tensor.data_type = 2
        if labels is not None:
            label_tensor.int32_data.append(labels[i])
        else:
            label_tensor.int32_data.append(-1)

        # Add data in transaction
        transaction.put('%0.6d' % i, tensor_protos.SerializeToString())

    # Close the transaction and close the database
    del transaction
    del db

create_database('db_train', X_train, y_train)
create_database('db_validation', X_valid, y_valid)
create_database('db_test', X_test)

In [4]:
def db_input(model, blobs_out, batch_size, db, db_type):
    dbreader_name = "dbreader_" + db
    dbreader = model.param_init_net.CreateDB(
        [],
        dbreader_name,
        db=db,
        db_type=db_type,
    )
    return model.net.TensorProtosDBInput(
        dbreader, blobs_out, batch_size=batch_size)

In [5]:
def create_model(name, db_name, batch_size=100, hidden_dim=150, output_dim=10, train=True, accuracy=True):
    model = model_helper.ModelHelper(name=name)

    # Prepare data input operator that will fetch data from DB
    data, label = db_input(
        model,
        ['data', 'label'],
        batch_size=batch_size,
        db=db_name,
        # db_type='leveldb')
        db_type='minidb')
    data = model.StopGradient(data, data)
    fc1 = brew.fc(model, data, "fc1", dim_in=28 * 28, dim_out=hidden_dim)
    relu1 = model.Relu(fc1, "relu1")
    
    fc2 = brew.fc(model, relu1, "fc2", dim_in=hidden_dim, dim_out=hidden_dim)
    tanh2 = model.Tanh(fc2, "tanh2")
    
    fc3 = brew.fc(model, tanh2, "fc3", dim_in=hidden_dim, dim_out=hidden_dim)
    elu3 = model.Elu("fc3", "elu3")
    
    fc4 = brew.fc(model, "elu3", "fc4", dim_in=hidden_dim, dim_out=output_dim)
    
    softmax = model.Softmax(fc4, 'softmax')

    # Check if we need to add training operators
    if train:
        # Prepare Cross Entropy operators with loss
        xent = model.LabelCrossEntropy([softmax, label], 'xent')
        loss = model.AveragedLoss(xent, "loss")

        # Add all gradient operators that will be needed to calculate our loss and train our model
        model.AddGradientOperators([loss])
        
        # Prepare variables for SGD
        ITER = model.Iter([], "iter")
        # LR = model.LearningRate(ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999)
        LR = model.param_init_net.ConstantFill([], "LR", shape=[1], value=-0.02)
        ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
        
        # Update all gradients for each params
        for param in model.params:
            # Note how we get the gradient of each parameter - CNNModelHelper keeps
            # track of that
            param_grad = model.param_to_grad[param]
            
            # The update is a simple weighted sum: param = param + param_grad * LR
            model.WeightedSum([param, ONE, param_grad, LR], param)

    # Add accuracy metrics if needed
    if accuracy:
        model.Accuracy([softmax, label], "accuracy")
    
    return model


In [6]:
train_model = create_model("train", "db_train")
validation_model = create_model("validation", "db_validation", train=False)



In [11]:
train_model.net.Proto()

name: "train"
op {
  input: "dbreader_db_train"
  output: "data"
  output: "label"
  name: ""
  type: "TensorProtosDBInput"
  arg {
    name: "batch_size"
    i: 100
  }
}
op {
  input: "data"
  output: "data"
  name: ""
  type: "StopGradient"
}
op {
  input: "data"
  input: "fc1_w"
  input: "fc1_b"
  output: "fc1"
  name: ""
  type: "FC"
  arg {
    name: "order"
    s: "NCHW"
  }
  arg {
    name: "use_cudnn"
    i: 1
  }
  arg {
    name: "cudnn_exhaustive_search"
    i: 0
  }
}
op {
  input: "fc1"
  output: "relu1"
  name: ""
  type: "Relu"
}
op {
  input: "relu1"
  input: "fc2_w"
  input: "fc2_b"
  output: "fc2"
  name: ""
  type: "FC"
  arg {
    name: "order"
    s: "NCHW"
  }
  arg {
    name: "use_cudnn"
    i: 1
  }
  arg {
    name: "cudnn_exhaustive_search"
    i: 0
  }
}
op {
  input: "fc2"
  output: "tanh2"
  name: ""
  type: "Tanh"
}
op {
  input: "tanh2"
  input: "fc3_w"
  input: "fc3_b"
  output: "fc3"
  name: ""
  type: "FC"
  arg {
    name: "order"
    s: "NCHW"
  }

In [7]:
def calculate_validation_accuracy():
    # Initialize our model
    workspace.RunNetOnce(validation_model.param_init_net)
    workspace.CreateNet(validation_model.net, overwrite=True)
    
    # Iterate over all validation dataset
    all_accuracy = []
    for i in range(X_valid.shape[0]//100):
        workspace.RunNet(validation_model.net.Proto().name)
        all_accuracy.append(workspace.FetchBlob('accuracy'))
    
    # Return mean accuracy for validation dataset
    return np.array(all_accuracy).mean()

In [8]:
# Initialize out training model
workspace.RunNetOnce(train_model.param_init_net)
workspace.CreateNet(train_model.net, overwrite=True)

# Iterate over all epochs
# NUMBER_OF_EPOCHS = 10000
NUMBER_OF_EPOCHS = 100
for i in range(NUMBER_OF_EPOCHS):
    # Train our model
    start_time = time.time()
    workspace.RunNet(train_model.net.Proto().name)
    
    # Once per 20 epochs let's run validation and print results
    if (i+1) % 50 == 0:
        train_loss = workspace.FetchBlob('loss')
        train_accuracy = workspace.FetchBlob('accuracy')
        val_accuracy = calculate_validation_accuracy()
        epoch_time = time.time()-start_time
        print(('Epoch #%d/%d TIME_per_epoch: %.3fs '+
               'TRAIN_Loss: %.4f TRAIN_Acc: %.4f '+
               'VAL_Acc: %.4f') % (i+1, NUMBER_OF_EPOCHS, epoch_time, train_loss, train_accuracy, val_accuracy))

Epoch #50/100 TIME_per_epoch: 0.126s TRAIN_Loss: 1.7594 TRAIN_Acc: 0.6100 VAL_Acc: 0.0857
Epoch #100/100 TIME_per_epoch: 0.113s TRAIN_Loss: 1.6859 TRAIN_Acc: 0.7400 VAL_Acc: 0.0839


In [9]:
# Initialize out prediction model
test_model = create_model('test_model', 'db_test', train=False, accuracy=False)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

# Iterate over all test dataset
predicted_labels = []
for i in range(X_test.shape[0]//100):
    # Run our model for predicting labels
    workspace.RunNet(test_model.net.Proto().name)
    batch_prediction = workspace.FetchBlob('softmax')
    if (i+1) % 20 == 0:
        print('Predicting #{}/{}...'.format(i+1, X_test.shape[0]/100))
    
    # Retrieve labels
    for prediction in batch_prediction:
        predicted_labels.append(np.argmax(prediction))  # Label = index of max argument



Predicting #20/280.0...
Predicting #40/280.0...
Predicting #60/280.0...
Predicting #80/280.0...
Predicting #100/280.0...
Predicting #120/280.0...
Predicting #140/280.0...
Predicting #160/280.0...
Predicting #180/280.0...
Predicting #200/280.0...
Predicting #220/280.0...
Predicting #240/280.0...
Predicting #260/280.0...
Predicting #280/280.0...


In [10]:
len(predicted_labels)

28000