Skip to content

Commit

Permalink
OpenCL Winograd F(4x4, 3x3)
Browse files Browse the repository at this point in the history
and OpenCL batching support.
  • Loading branch information
Ttl committed Jul 23, 2018
1 parent 952fc0a commit 0b6f6c7
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 203 deletions.
40 changes: 17 additions & 23 deletions src/CPUPipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@
#include "Network.h"
#include "Im2Col.h"

// Square root of 2
#define SQ2 (1.4142135623730951f)

void CPUPipe::initialize(int channels) {
m_input_channels = channels;
}
Expand All @@ -47,8 +44,8 @@ void CPUPipe::winograd_transform_in(const std::vector<float>& in,
const int C) {
constexpr auto W = BOARD_SIZE;
constexpr auto H = BOARD_SIZE;
constexpr auto WTILES = W / WINOGRAD_M + (W % WINOGRAD_M != 0);
constexpr auto P = WTILES * WTILES;
constexpr auto WTILES = WINOGRAD_WTILES;
constexpr auto P = WINOGRAD_P;

constexpr auto Wpad = 2 + WINOGRAD_M * WTILES;

Expand All @@ -75,13 +72,13 @@ void CPUPipe::winograd_transform_in(const std::vector<float>& in,
std::array<std::array<float, WINOGRAD_ALPHA>, WINOGRAD_ALPHA>;
WinogradTile T1, T2;

const auto Bt = std::array<float, WINOGRAD_TILE>\
{ 1., 0., -5./2., 0., 1., 0.,
0., -SQ2, -2., SQ2/2, 1., 0.,
0., SQ2, -2., -SQ2/2., 1., 0.,
0., -SQ2/2., -1./2., SQ2, 1., 0.,
0., SQ2/2., -1./2., -SQ2, 1., 0.,
0., 1., 0., -5./2., 0., 1.};
const auto Bt = std::array<float, WINOGRAD_TILE>
{1.0f, 0.0f, -5.0f/2.0f, 0.0f, 1.0f, 0.0f,
0.0f, -SQ2, -2.0f, SQ2/2.0f, 1.0f, 0.0f,
0.0f, SQ2, -2.0f, -SQ2/2.0f, 1.0f, 0.0f,
0.0f, -SQ2/2.0f, -1.0f/2.0f, SQ2, 1.0f, 0.0f,
0.0f, SQ2/2.0f, -1.0f/2.0f, -SQ2, 1.0f, 0.0f,
0.0f, 1.0f, 0.0f, -5.0f/2.0f, 0.0f, 1.0f};

// Calculates transpose(B).x.B
for (auto i = 0; i < WINOGRAD_ALPHA; i++){
Expand Down Expand Up @@ -120,9 +117,7 @@ void CPUPipe::winograd_sgemm(const std::vector<float>& U,
const std::vector<float>& V,
std::vector<float>& M,
const int C, const int K) {
constexpr auto W = BOARD_SIZE;
constexpr auto WTILES = W / WINOGRAD_M + (W % WINOGRAD_M != 0);
constexpr auto P = WTILES * WTILES;
constexpr auto P = WINOGRAD_P;

for (auto b = 0; b < WINOGRAD_TILE; b++) {
const auto offset_u = b * K * C;
Expand All @@ -144,8 +139,8 @@ void CPUPipe::winograd_transform_out(const std::vector<float>& M,
const int K) {
constexpr auto W = BOARD_SIZE;
constexpr auto H = BOARD_SIZE;
constexpr auto WTILES = W / WINOGRAD_M + (W % WINOGRAD_M != 0);
constexpr auto P = WTILES * WTILES;
constexpr auto WTILES = WINOGRAD_WTILES;
constexpr auto P = WINOGRAD_P;

for (auto k = 0; k < K; k++) {
for (auto block_x = 0; block_x < WTILES; block_x++) {
Expand All @@ -165,10 +160,10 @@ void CPUPipe::winograd_transform_out(const std::vector<float>& M,
}

const auto At = std::array<float, WINOGRAD_ALPHA * WINOGRAD_M>
{ 1., 1., 1., 1., 1., 0.,
0., SQ2/2., -SQ2/2., SQ2, -SQ2, 0.,
0., 1./2., 1./2., 2., 2., 0.,
0., SQ2/4., -SQ2/4., 2.*SQ2, -2.*SQ2, 1.};
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
0.0f, SQ2/2.0f, -SQ2/2.0f, SQ2, -SQ2, 0.0f,
0.0f, 1.0f/2.0f, 1.0f/2.0f, 2.0f, 2.0f, 0.0f,
0.0f, SQ2/4.0f, -SQ2/4.0f, 2.0f*SQ2, -2.0f*SQ2, 1.0f};

std::array<std::array<float, WINOGRAD_ALPHA>, WINOGRAD_M> temp;
std::array<std::array<float, WINOGRAD_M>, WINOGRAD_M> o;
Expand Down Expand Up @@ -298,8 +293,7 @@ void CPUPipe::forward(const std::vector<float>& input,
std::vector<float>& output_pol,
std::vector<float>& output_val) {
// Input convolution
constexpr auto WTILES = BOARD_SIZE / WINOGRAD_M + (BOARD_SIZE % WINOGRAD_M != 0);
constexpr auto P = WTILES * WTILES;
constexpr auto P = WINOGRAD_P;
// Calculate output channels
const auto output_channels = m_input_channels;
// input_channels is the maximum number of input channels of any
Expand Down
16 changes: 7 additions & 9 deletions src/Network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@
#include "Timing.h"
#include "Utils.h"

// Square root of 2
#define SQ2 (1.4142135623730951f)

namespace x3 = boost::spirit::x3;
using namespace Utils;

Expand Down Expand Up @@ -104,12 +101,13 @@ std::vector<float> Network::winograd_transform_f(const std::vector<float>& f,
// transpose(G.dot(f).dot(G.transpose()))
// U matrix is transposed for better memory layout in SGEMM
auto U = std::vector<float>(WINOGRAD_TILE * outputs * channels);
const auto G = std::array<float, 3 * WINOGRAD_ALPHA>{ 1.0, 0.0, 0.0,
-2./3., -SQ2/3., -1./3.,
-2./3., SQ2/3., -1./3.,
1./6., SQ2/6., 1./3.,
1./6., -SQ2/6., 1./3.,
0.0, 0.0, 1.0};
const auto G = std::array<float, 3 * WINOGRAD_ALPHA>
{ 1.0f, 0.0f, 0.0f,
-2.0f/3.0f, -SQ2/3.0f, -1.0f/3.0f,
-2.0f/3.0f, SQ2/3.0f, -1.0f/3.0f,
1.0f/6.0f, SQ2/6.0f, 1.0f/3.0f,
1.0f/6.0f, -SQ2/6.0f, 1.0f/3.0f,
0.0f, 0.0f, 1.0f};

auto temp = std::array<float, 3 * WINOGRAD_ALPHA>{};

Expand Down
3 changes: 3 additions & 0 deletions src/Network.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@
// Winograd filter transformation changes 3x3 filters to M + 3 - 1
constexpr auto WINOGRAD_M = 4;
constexpr auto WINOGRAD_ALPHA = WINOGRAD_M + 3 - 1;
constexpr auto WINOGRAD_WTILES = BOARD_SIZE / WINOGRAD_M + (BOARD_SIZE % WINOGRAD_M != 0);
constexpr auto WINOGRAD_TILE = WINOGRAD_ALPHA * WINOGRAD_ALPHA;
constexpr auto WINOGRAD_P = WINOGRAD_WTILES * WINOGRAD_WTILES;
constexpr auto SQ2 = 1.4142135623730951f; // Square root of 2

class Network {
public:
Expand Down

0 comments on commit 0b6f6c7

Please sign in to comment.