Skip to content

Commit

Permalink
Merge pull request #361 from sony/feature/20211015-fix-dropout-output…
Browse files Browse the repository at this point in the history
…-mask

Remove the argument "output_mask" from Dropout
  • Loading branch information
TomonobuTsujikawa committed Dec 17, 2021
2 parents b08ba11 + 1b09606 commit 90206c2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
8 changes: 5 additions & 3 deletions include/nbla/cuda/function/dropout.hpp
Expand Up @@ -28,9 +28,8 @@ template <typename T> class DropoutCuda : public Dropout<T> {
public:
typedef typename CudaType<T>::type Tc;

explicit DropoutCuda(const Context &ctx, double p, int seed = -1,
bool output_mask = false)
: Dropout<T>(ctx, T(p), seed, output_mask) {
explicit DropoutCuda(const Context &ctx, double p, int seed = -1)
: Dropout<T>(ctx, T(p), seed) {
cuda_set_device(std::stoi(ctx.device_id));
NBLA_CHECK(this->p_ >= 0., error_code::value,
"p must be between 0.0 and 1.0");
Expand All @@ -55,11 +54,14 @@ template <typename T> class DropoutCuda : public Dropout<T> {

protected:
curandGenerator_t curand_generator_;
bool store_mask_for_recompute_ = false;
virtual void setup_impl(const Variables &inputs, const Variables &outputs);
virtual void forward_impl(const Variables &inputs, const Variables &outputs);
virtual void backward_impl(const Variables &inputs, const Variables &outputs,
const vector<bool> &propagate_down,
const vector<bool> &accum);
virtual void setup_recompute_impl(const Variables &inputs,
const Variables &outputs);
virtual void recompute_impl(const Variables &inputs,
const Variables &outputs);

Expand Down
43 changes: 30 additions & 13 deletions src/nbla/cuda/function/generic/dropout.cu
Expand Up @@ -31,6 +31,16 @@ __global__ void kernel_dropout_forward(const int size, const float scale,
}
}

template <typename T>
__global__ void kernel_dropout_recompute(const int size, const float scale,
const float p, const T *x, T *y,
const float *m) {
NBLA_CUDA_KERNEL_LOOP(s, size) {
// This operation is done when forward. m[s] = (m[s] > p) ? 1 : 0;
y[s] = x[s] * m[s] * scale;
}
}

template <typename T, bool accum>
__global__ void kernel_dropout_backward(const int size, const float scale,
const T *dy, const float *m, T *dx) {
Expand All @@ -42,12 +52,13 @@ __global__ void kernel_dropout_backward(const int size, const float scale,
template <typename T>
void DropoutCuda<T>::setup_impl(const Variables &inputs,
const Variables &outputs) {
outputs[0]->reshape(inputs[0]->shape(), true);
if (this->output_mask_) {
outputs[1]->reshape(inputs[0]->shape(), true);
} else {
this->mask_.reshape(inputs[0]->shape(), true);
}
Dropout<T>::setup_impl(inputs, outputs);
}

template <typename T>
void DropoutCuda<T>::setup_recompute_impl(const Variables &inputs,
const Variables &outputs) {
store_mask_for_recompute_ = true;
}

template <class T>
Expand All @@ -56,8 +67,8 @@ void DropoutCuda<T>::forward_impl(const Variables &inputs,
cuda_set_device(std::stoi(this->ctx_.device_id));
const Tc *x = inputs[0]->get_data_pointer<Tc>(this->ctx_);
Tc *y = outputs[0]->cast_data_and_get_pointer<Tc>(this->ctx_, true);
Variable &mask = this->output_mask_ ? *outputs[1] : this->mask_;
float *m = mask.cast_data_and_get_pointer<float>(this->ctx_, true);
VariablePtr mask = this->mask_;
float *m = mask->cast_data_and_get_pointer<float>(this->ctx_, true);
curandGenerator_t &gen =
this->seed_ == -1 ? SingletonManager::get<Cuda>()->curand_generator()
: curand_generator_;
Expand All @@ -69,12 +80,16 @@ void DropoutCuda<T>::forward_impl(const Variables &inputs,
template <class T>
void DropoutCuda<T>::recompute_impl(const Variables &inputs,
const Variables &outputs) {
NBLA_CHECK(this->mask_->data()->array()->get_num_arrays(),
error_code::unclassified,
"The mask of Dropout must be stored in mask_ for recomputation. "
"Please report this error to the NNabla developer team.");
cuda_set_device(std::stoi(this->ctx_.device_id));
const Tc *x = inputs[0]->get_data_pointer<Tc>(this->ctx_);
Tc *y = outputs[0]->cast_data_and_get_pointer<Tc>(this->ctx_, true);
Variable &mask = this->output_mask_ ? *outputs[1] : this->mask_;
float *m = mask.cast_data_and_get_pointer<float>(this->ctx_, true);
NBLA_CUDA_LAUNCH_KERNEL_SIMPLE(kernel_dropout_forward, inputs[0]->size(),
VariablePtr mask = this->mask_;
const float *m = mask->get_data_pointer<float>(this->ctx_);
NBLA_CUDA_LAUNCH_KERNEL_SIMPLE(kernel_dropout_recompute, inputs[0]->size(),
this->scale_, this->p_, x, y, m);
}

Expand All @@ -89,14 +104,16 @@ void DropoutCuda<T>::backward_impl(const Variables &inputs,
cuda_set_device(std::stoi(this->ctx_.device_id));
Tc *dx = inputs[0]->cast_grad_and_get_pointer<Tc>(this->ctx_, !accum[0]);
const Tc *dy = outputs[0]->get_grad_pointer<Tc>(this->ctx_);
Variable &mask = this->output_mask_ ? *outputs[1] : this->mask_;
const float *m = mask.get_data_pointer<float>(this->ctx_);
VariablePtr mask = this->mask_;
const float *m = mask->get_data_pointer<float>(this->ctx_);
if (accum[0]) {
NBLA_CUDA_LAUNCH_KERNEL_SIMPLE((kernel_dropout_backward<Tc, true>),
inputs[0]->size(), this->scale_, dy, m, dx);
} else {
NBLA_CUDA_LAUNCH_KERNEL_SIMPLE((kernel_dropout_backward<Tc, false>),
inputs[0]->size(), this->scale_, dy, m, dx);
}

this->clear_buffer();
}
}

0 comments on commit 90206c2

Please sign in to comment.