Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CPU only option for Allie by reusing the BLAS backend from Leela.
- Loading branch information
Showing
26 changed files
with
1,778 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
DEFINES += USE_OPENBLAS | ||
LIBS += -lopenblas |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The files in this directory comprise the BLAS backend of Lc0. | ||
|
||
## License | ||
|
||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
|
||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
|
||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
**The source files of this directory are not covered by any additional | ||
permission.** | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
This file is part of Leela Chess Zero. | ||
Copyright (C) 2018 The LCZero Authors | ||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#pragma once | ||
|
||
// Select the BLAS vendor based on defines | ||
|
||
#ifdef USE_MKL | ||
#include <mkl.h> | ||
#else | ||
|
||
#ifdef USE_OPENBLAS | ||
#include <cblas.h> | ||
|
||
// Specific openblas routines. | ||
extern "C" { | ||
int openblas_get_num_procs(void); | ||
void openblas_set_num_threads(int num_threads); | ||
char* openblas_get_corename(void); | ||
char* openblas_get_config(void); | ||
} | ||
|
||
#else | ||
|
||
#ifdef __APPLE__ | ||
#include <Accelerate/Accelerate.h> | ||
#define USE_ACCELERATE | ||
#endif | ||
|
||
#endif // USE_OPENBLAS | ||
|
||
#endif // USE_MKL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
This file is part of Leela Chess Zero. | ||
Copyright (C) 2018 The LCZero Authors | ||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "neural/blas/convolution1.h" | ||
#include "neural/blas/blas.h" | ||
|
||
namespace lczero { | ||
|
||
void Convolution1::Forward(const size_t batch_size, const size_t input_channels, | ||
const size_t output_channels, const float* input, | ||
const float* weights, float* output) { | ||
for (size_t i = 0; i < batch_size; i++) { | ||
// C←αAB + βC | ||
// M Number of rows in matrices A and C. | ||
// N Number of columns in matrices B and C. | ||
// K Number of columns in matrix A; number of rows in matrix B. | ||
// lda The size of the first dimension of matrix A; if you are | ||
// passing a matrix A[m][n], the value should be m. | ||
// cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, | ||
// ldb, beta, C, N); | ||
|
||
// C A B | ||
// | ||
// outputs := weights x input | ||
// | ||
// cols: kSquares (N) input_channels (K) kSquares(N) | ||
// | ||
// rows: output_channels (M) output_channels (M) input_channels (K) | ||
|
||
const float* batch_input = input + i * kSquares * input_channels; | ||
float* batch_output = output + i * kSquares * output_channels; | ||
|
||
cblas_sgemm(CblasRowMajor, // Row major formar | ||
CblasNoTrans, // A not transposed | ||
CblasNoTrans, // B not transposed | ||
(int)output_channels, // M | ||
kSquares, // N | ||
(int)input_channels, // K | ||
1.0f, // Alpha | ||
weights, // A | ||
(int)input_channels, // lda, leading rank of A | ||
batch_input, // B | ||
kSquares, // ldb, leading rank of B | ||
0.0f, // beta | ||
batch_output, // C | ||
kSquares); // ldc, leading rank of B | ||
} | ||
} | ||
|
||
} // namespace lczero |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
This file is part of Leela Chess Zero. | ||
Copyright (C) 2018 The LCZero Authors | ||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cstddef> | ||
#include <vector> | ||
|
||
namespace lczero { | ||
|
||
// Convolution 1x1 | ||
class Convolution1 { | ||
public: | ||
Convolution1() = delete; | ||
|
||
// Batched forward inference. | ||
static void Forward(const size_t batch_size, const size_t input_channels, | ||
const size_t output_channels, const float* input, | ||
const float* weights, float* output); | ||
|
||
private: | ||
static constexpr auto kWidth = 8; | ||
static constexpr auto kHeight = 8; | ||
static constexpr auto kSquares = kWidth * kHeight; | ||
}; | ||
} // namespace lczero |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/* | ||
This file is part of Leela Chess Zero. | ||
Copyright (C) 2018 The LCZero Authors | ||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "neural/blas/fully_connected_layer.h" | ||
#include "neural/blas/blas.h" | ||
|
||
#include <algorithm> | ||
#include <cassert> | ||
#include <cmath> | ||
|
||
namespace lczero { | ||
|
||
void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, | ||
const size_t output_size, | ||
const float* inputs, const float* weights, | ||
const float* biases, bool apply_relu, | ||
float* outputs) { | ||
if (batch_size == 1) { | ||
// Just a matrix-vector multiplication | ||
// | ||
// C A B | ||
// | ||
// outputs := weights x inputs | ||
// | ||
// cols: 1 input_size 1 | ||
// | ||
// rows output_size output_size input_size | ||
// | ||
|
||
cblas_sgemv(CblasRowMajor, CblasNoTrans, | ||
// M K | ||
(int)output_size, (int)input_size, 1.0f, weights, | ||
(int)input_size, inputs, 1, 0.0f, outputs, 1); | ||
} else { | ||
// more columns, matrix-matrix multiplication | ||
// | ||
// C A B | ||
// | ||
// outputs := weights x inputs | ||
// | ||
// cols: batch_size (N) input_size (K) batch_size (N) | ||
// | ||
// rows output_size (M) output_size (M) input_size (K) | ||
// | ||
|
||
// C←αAB + βC | ||
// M Number of rows in matrices A and C. | ||
// N Number of columns in matrices B and C. | ||
// K Number of columns in matrix A; number of rows in matrix B. | ||
// lda The size of the first dimension of matrix A; if you are | ||
// passing a matrix A[m][n], the value should be m. | ||
// cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, | ||
// ldb, beta, C, N); | ||
|
||
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, | ||
(int)output_size, // M | ||
(int)batch_size, // N | ||
(int)input_size, // K | ||
1.0f, // alpha | ||
weights, // A | ||
(int)input_size, // lda, leading rank of A | ||
inputs, // B | ||
(int)input_size, // ldb, leading rank of B | ||
0.0f, // beta | ||
outputs, // C | ||
(int)output_size); // ldc, leading rank of C | ||
} | ||
if (apply_relu) { | ||
for (size_t i = 0; i < batch_size; i++) { | ||
float* batch_outputs = outputs + i * output_size; | ||
for (size_t o = 0; o < output_size; o++) { | ||
float val = biases[o] + batch_outputs[o]; | ||
batch_outputs[o] = val >= 0 ? val : 0; | ||
} | ||
} | ||
} else { | ||
for (size_t i = 0; i < batch_size; i++) { | ||
float* batch_outputs = outputs + i * output_size; | ||
for (size_t o = 0; o < output_size; o++) { | ||
batch_outputs[o] += biases[o]; | ||
} | ||
} | ||
} | ||
} | ||
|
||
float FullyConnectedLayer::Forward0D(const size_t size, const float* x, | ||
const float* y) { | ||
// A scalar product, also known as a dot-product. | ||
// float cblas_sdot(const int N, const float *X, const int incX, const float | ||
// *Y, | ||
// const int incY); | ||
return cblas_sdot((int)size, x, 1, y, 1); | ||
} | ||
|
||
} // namespace lczero |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
This file is part of Leela Chess Zero. | ||
Copyright (C) 2018 The LCZero Authors | ||
Leela Chess is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
Leela Chess is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cstddef> | ||
#include <vector> | ||
|
||
namespace lczero { | ||
|
||
class FullyConnectedLayer { | ||
public: | ||
FullyConnectedLayer() = delete; | ||
|
||
// Forward inference, batched, from input_size to output_size | ||
static void Forward1D(const size_t batch_size, const size_t input_size, | ||
const size_t output_size, const float* input, | ||
const float* weights, const float* biases, | ||
bool apply_relu, float* output); | ||
|
||
// Forward inference, no batched, from input_size to scalar | ||
static float Forward0D(const size_t input_size, const float* input, | ||
const float* weights); | ||
|
||
}; | ||
|
||
} // namespace lczero |
Oops, something went wrong.