This repository was archived by the owner on Feb 20, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathckernel_sfpu_dropout.h
More file actions
73 lines (59 loc) · 1.99 KB
/
ckernel_sfpu_dropout.h
File metadata and controls
73 lines (59 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "ckernel_defs.h"
#include "ckernel.h"
#include "noc_nonblocking_api.h"
#include "sfpi.h"
using namespace sfpi;
namespace ckernel
{
namespace sfpu
{
// probability should be between 0 - INT_MAX (signed)
// scale should be binary representation of a float32
template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_dropout_(const int iterations, uint probability, uint scale)
{
// SFPU microcode
FWLOG1("calculate_dropout() -- probability:%x", probability);
FWLOG1("calculate_dropout() -- scale:%x", scale);
TT_SFPLOADI(p_sfpu::LREG1, 10, scale&0xFFFF);
TT_SFPLOADI(p_sfpu::LREG1, 8, scale>>16);
TT_SFPLOADI(p_sfpu::LREG2, 10, probability&0xFFFF);
TT_SFPLOADI(p_sfpu::LREG2, 8, probability>>16);
#pragma GCC unroll 0
for (int d = 0; d < iterations; d++) {
////////////////////////
// Scale samples
// dst_reg[0] = dst_reg[0] * s2vFloat16b(scale);
///////////////////////
TTI_SFPLOAD(p_sfpu::LREG0, 0, 3, 0);
TTI_SFPMUL(p_sfpu::LREG0, p_sfpu::LREG1, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);
////////////////////////
// Instruction SFPMOV generates a uint32_t pseudorandom number
// when instr_mod1 = 8 and lreg_c = 9.
// Arguments: (imm12_math, lreg_c, lreg_dest, instr_mod1)
// Unset sign-bit for easy comparison with probability
////////////////////////
TTI_SFPMOV(0, 9, p_sfpu::LREG3, 8);
TTI_SFPSETSGN(0, p_sfpu::LREG3, p_sfpu::LREG3, 1);
////////////////////////
// Drop samples
// v_if (rand < probability)
// dst_reg[0] = vConst0;
///////////////////////
TTI_SFPIADD(0, p_sfpu::LREG2, p_sfpu::LREG3, 10);
TTI_SFPMOV(0, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);
TTI_SFPENCC(0,0,0,0);
TTI_SFPSTORE(0,0,3,0);
dst_reg++;
}
}
inline void _init_dropout_(const uint seed)
{
init_prng_seed(seed);
}
} // namespace sfpu
} // namespace ckernel