Skip to content

Commit

Permalink
Kaiming Initialization (#14718)
Browse files Browse the repository at this point in the history
Summary:
/cc goldsborough

Working on #14582

The corresponding python implementations are at: [pytorch/torch/nn/init.py](https://github.com/pytorch/pytorch/blob/6302e4001ab54b3ddeca2b608d337fe7077e801c/torch/nn/init.py#L261-L327)

Here is my initial implementation of Kaiming Initialization. I have not been able to figure out how to successfully run tests locally so I haven't added any yet.

A couple questions:
- Are the enums defined in the right place? I copied their names from Python, but do you prefer different naming conventions for C++?
- To run tests locally do I use `python setup.py test`? Can I run just a subset of the tests somehow?
- Should I add my tests at [test/cpp/api/misc.cpp](https://github.com/pytorch/pytorch/blob/master/test/cpp/api/misc.cpp#L47-L54)?
Pull Request resolved: #14718

Differential Revision: D14049159

Pulled By: goldsborough

fbshipit-source-id: 966ac5126875936e69b185b5041f16476ed4cf70
  • Loading branch information
JoshVarty authored and facebook-github-bot committed Feb 15, 2019
1 parent 5eee067 commit 1cdcdd7
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 11 deletions.
1 change: 1 addition & 0 deletions test/cpp/api/CMakeLists.txt
Expand Up @@ -5,6 +5,7 @@ set(TORCH_API_TEST_SOURCES
${TORCH_API_TEST_DIR}/dataloader.cpp
${TORCH_API_TEST_DIR}/expanding-array.cpp
${TORCH_API_TEST_DIR}/integration.cpp
${TORCH_API_TEST_DIR}/init.cpp
${TORCH_API_TEST_DIR}/jit.cpp
${TORCH_API_TEST_DIR}/memory.cpp
${TORCH_API_TEST_DIR}/misc.cpp
Expand Down
126 changes: 126 additions & 0 deletions test/cpp/api/init.cpp
@@ -0,0 +1,126 @@
#include <gtest/gtest.h>

#include <torch/nn/init.h>
#include <torch/nn/modules/linear.h>

#include <test/cpp/api/init_baseline.h>
#include <test/cpp/api/support.h>

#include <functional>
#include <vector>

void check_exact_values(
const std::vector<torch::Tensor>& parameters,
const std::vector<std::vector<torch::Tensor>>& expected_parameters) {
ASSERT_EQ(parameters.size(), expected_parameters.size());

for (size_t i = 0; i < parameters.size(); i++) {
auto layerParameters = parameters[i];
auto expectedLayerParameters = expected_parameters[i];

if (layerParameters.size(0) != expectedLayerParameters.size()) {
std::cout << "layer #" << i
<< " layerParameters size: " << layerParameters.size(0)
<< " != "
<< " expectedLayerParameters size: "
<< expectedLayerParameters.size() << std::endl;
ASSERT_TRUE(false);
}

for (size_t p = 0; p < layerParameters.size(0); p++) {
auto tensor = layerParameters[p];
auto expectedTensor = expectedLayerParameters[p];

if (!tensor.allclose(expectedTensor, /*rtol=*/1e-3, /*atol=*/5e-4)) {
std::cout << "layer " << i << ": " << tensor << " != " << expectedTensor
<< " (parameter " << p << ")" << std::endl;
ASSERT_TRUE(false);
}
}
}
}

void check_initializer_against_baseline(
std::function<void(torch::Tensor)> initializer,
std::vector<std::vector<torch::Tensor>> expected) {
torch::manual_seed(0);

auto layer1 = torch::nn::Linear(7, 15);
initializer(layer1->weight);
layer1->to(torch::kFloat64);

auto layer2 = torch::nn::Linear(15, 15);
initializer(layer2->weight);
layer2->to(torch::kFloat64);

auto layer3 = torch::nn::Linear(15, 2);
initializer(layer3->weight);
layer3->to(torch::kFloat64);

auto parameters = std::vector<torch::Tensor>{
layer1->weight,
layer2->weight,
layer3->weight,
};

check_exact_values(parameters, expected);
}

TEST(InitTest, ProducesPyTorchValues_XavierUniform) {
auto expected = expected_parameters::Xavier_Uniform();
auto initializer = [](torch::Tensor tensor) {
torch::nn::init::xavier_uniform_(tensor);
};
check_initializer_against_baseline(initializer, expected);
}

TEST(InitTest, ProducesPyTorchValues_XavierNormal) {
auto expected = expected_parameters::Xavier_Normal();
auto initializer = [](torch::Tensor tensor) {
torch::nn::init::xavier_normal_(tensor);
};
check_initializer_against_baseline(initializer, expected);
}

TEST(InitTest, ProducesPyTorchValues_KaimingNormal) {
auto expected = expected_parameters::Kaiming_Normal();
auto initializer = [](torch::Tensor tensor) {
torch::nn::init::kaiming_normal_(tensor);
};
check_initializer_against_baseline(initializer, expected);
}

TEST(InitTest, ProducesPyTorchValues_KaimingUniform) {
auto expected = expected_parameters::Kaiming_Uniform();
auto initializer = [](torch::Tensor tensor) {
torch::nn::init::kaiming_uniform_(tensor);
};
check_initializer_against_baseline(initializer, expected);
}

TEST(InitTest, CanInitializeTensorThatRequiresGrad) {
auto tensor = torch::empty({3, 4}, torch::requires_grad());
ASSERT_THROWS_WITH(
tensor.fill_(1),
"a leaf Variable that requires grad "
"has been used in an in-place operation");
ASSERT_EQ(torch::nn::init::ones_(tensor).sum().item<int32_t>(), 12);
}

TEST(InitTest, CalculateGainWithTanh) {
double gain =
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::Tanh);
ASSERT_DOUBLE_EQ(gain, 5.0 / 3.0);
}

TEST(InitTest, CalculateGainWithRelu) {
double gain =
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::ReLU);
ASSERT_DOUBLE_EQ(gain, std::sqrt(2.0));
}

TEST(InitTest, CalculateGainWithLeakyRelu) {
double gain =
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::LeakyReLU);
ASSERT_DOUBLE_EQ(gain, std::sqrt(2.0 / (1 + pow(0.01, 2))));
}

0 comments on commit 1cdcdd7

Please sign in to comment.