Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: /cc goldsborough Working on #14582 The corresponding python implementations are at: [pytorch/torch/nn/init.py](https://github.com/pytorch/pytorch/blob/6302e4001ab54b3ddeca2b608d337fe7077e801c/torch/nn/init.py#L261-L327) Here is my initial implementation of Kaiming Initialization. I have not been able to figure out how to successfully run tests locally so I haven't added any yet. A couple questions: - Are the enums defined in the right place? I copied their names from Python, but do you prefer different naming conventions for C++? - To run tests locally do I use `python setup.py test`? Can I run just a subset of the tests somehow? - Should I add my tests at [test/cpp/api/misc.cpp](https://github.com/pytorch/pytorch/blob/master/test/cpp/api/misc.cpp#L47-L54)? Pull Request resolved: #14718 Differential Revision: D14049159 Pulled By: goldsborough fbshipit-source-id: 966ac5126875936e69b185b5041f16476ed4cf70
- Loading branch information
1 parent
5eee067
commit 1cdcdd7
Showing
7 changed files
with
475 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <torch/nn/init.h> | ||
#include <torch/nn/modules/linear.h> | ||
|
||
#include <test/cpp/api/init_baseline.h> | ||
#include <test/cpp/api/support.h> | ||
|
||
#include <functional> | ||
#include <vector> | ||
|
||
void check_exact_values( | ||
const std::vector<torch::Tensor>& parameters, | ||
const std::vector<std::vector<torch::Tensor>>& expected_parameters) { | ||
ASSERT_EQ(parameters.size(), expected_parameters.size()); | ||
|
||
for (size_t i = 0; i < parameters.size(); i++) { | ||
auto layerParameters = parameters[i]; | ||
auto expectedLayerParameters = expected_parameters[i]; | ||
|
||
if (layerParameters.size(0) != expectedLayerParameters.size()) { | ||
std::cout << "layer #" << i | ||
<< " layerParameters size: " << layerParameters.size(0) | ||
<< " != " | ||
<< " expectedLayerParameters size: " | ||
<< expectedLayerParameters.size() << std::endl; | ||
ASSERT_TRUE(false); | ||
} | ||
|
||
for (size_t p = 0; p < layerParameters.size(0); p++) { | ||
auto tensor = layerParameters[p]; | ||
auto expectedTensor = expectedLayerParameters[p]; | ||
|
||
if (!tensor.allclose(expectedTensor, /*rtol=*/1e-3, /*atol=*/5e-4)) { | ||
std::cout << "layer " << i << ": " << tensor << " != " << expectedTensor | ||
<< " (parameter " << p << ")" << std::endl; | ||
ASSERT_TRUE(false); | ||
} | ||
} | ||
} | ||
} | ||
|
||
void check_initializer_against_baseline( | ||
std::function<void(torch::Tensor)> initializer, | ||
std::vector<std::vector<torch::Tensor>> expected) { | ||
torch::manual_seed(0); | ||
|
||
auto layer1 = torch::nn::Linear(7, 15); | ||
initializer(layer1->weight); | ||
layer1->to(torch::kFloat64); | ||
|
||
auto layer2 = torch::nn::Linear(15, 15); | ||
initializer(layer2->weight); | ||
layer2->to(torch::kFloat64); | ||
|
||
auto layer3 = torch::nn::Linear(15, 2); | ||
initializer(layer3->weight); | ||
layer3->to(torch::kFloat64); | ||
|
||
auto parameters = std::vector<torch::Tensor>{ | ||
layer1->weight, | ||
layer2->weight, | ||
layer3->weight, | ||
}; | ||
|
||
check_exact_values(parameters, expected); | ||
} | ||
|
||
TEST(InitTest, ProducesPyTorchValues_XavierUniform) { | ||
auto expected = expected_parameters::Xavier_Uniform(); | ||
auto initializer = [](torch::Tensor tensor) { | ||
torch::nn::init::xavier_uniform_(tensor); | ||
}; | ||
check_initializer_against_baseline(initializer, expected); | ||
} | ||
|
||
TEST(InitTest, ProducesPyTorchValues_XavierNormal) { | ||
auto expected = expected_parameters::Xavier_Normal(); | ||
auto initializer = [](torch::Tensor tensor) { | ||
torch::nn::init::xavier_normal_(tensor); | ||
}; | ||
check_initializer_against_baseline(initializer, expected); | ||
} | ||
|
||
TEST(InitTest, ProducesPyTorchValues_KaimingNormal) { | ||
auto expected = expected_parameters::Kaiming_Normal(); | ||
auto initializer = [](torch::Tensor tensor) { | ||
torch::nn::init::kaiming_normal_(tensor); | ||
}; | ||
check_initializer_against_baseline(initializer, expected); | ||
} | ||
|
||
TEST(InitTest, ProducesPyTorchValues_KaimingUniform) { | ||
auto expected = expected_parameters::Kaiming_Uniform(); | ||
auto initializer = [](torch::Tensor tensor) { | ||
torch::nn::init::kaiming_uniform_(tensor); | ||
}; | ||
check_initializer_against_baseline(initializer, expected); | ||
} | ||
|
||
TEST(InitTest, CanInitializeTensorThatRequiresGrad) { | ||
auto tensor = torch::empty({3, 4}, torch::requires_grad()); | ||
ASSERT_THROWS_WITH( | ||
tensor.fill_(1), | ||
"a leaf Variable that requires grad " | ||
"has been used in an in-place operation"); | ||
ASSERT_EQ(torch::nn::init::ones_(tensor).sum().item<int32_t>(), 12); | ||
} | ||
|
||
TEST(InitTest, CalculateGainWithTanh) { | ||
double gain = | ||
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::Tanh); | ||
ASSERT_DOUBLE_EQ(gain, 5.0 / 3.0); | ||
} | ||
|
||
TEST(InitTest, CalculateGainWithRelu) { | ||
double gain = | ||
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::ReLU); | ||
ASSERT_DOUBLE_EQ(gain, std::sqrt(2.0)); | ||
} | ||
|
||
TEST(InitTest, CalculateGainWithLeakyRelu) { | ||
double gain = | ||
torch::nn::init::calculate_gain(torch::nn::init::Nonlinearity::LeakyReLU); | ||
ASSERT_DOUBLE_EQ(gain, std::sqrt(2.0 / (1 + pow(0.01, 2)))); | ||
} |
Oops, something went wrong.