forked from microsoft/dpu-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mlp.py
94 lines (84 loc) · 3.84 KB
/
mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""MLP layer."""
import sys
from typing import Callable, List, Optional, Union
import tensorflow as tf
class MLP(tf.keras.layers.Layer):
def __init__(
self,
out_size: int,
hidden_layers: Union[List[int], int] = 1,
use_biases: bool = False,
activation_fun: Optional[Callable[[tf.Tensor], tf.Tensor]] = tf.nn.relu,
dropout_rate: float = 0.0,
name: str = "MLP",
):
"""
Create new MLP with given number of hidden layers.
Arguments:
out_size: Dimensionality of output.
hidden_layers: Either an integer determining number of hidden layers, which will have
out_size units each; or list of integers whose lengths determines the number of
hidden layers and whose contents the number of units in each layer.
use_biases: Flag indicating use of bias in fully connected layers.
activation_fun: Activation function applied between hidden layers (NB: the output of the
MLP is always the direct result of a linear transformation)
dropout_rate: Dropout applied to inputs of each MLP layer.
name: Name of the MLP, used in names of created variables.
"""
super().__init__()
if isinstance(hidden_layers, int):
if out_size == 1:
print(
f"W: In {name}, was asked to use {hidden_layers} layers of size 1, which is most likely wrong."
f" Switching to {hidden_layers} layers of size 32; to get hidden layers of size 1,"
f" use hidden_layers=[1,...,1] explicitly.",
file=sys.stderr,
)
self._hidden_layer_sizes = [32] * hidden_layers
else:
self._hidden_layer_sizes = [out_size] * hidden_layers
else:
self._hidden_layer_sizes = hidden_layers
if len(self._hidden_layer_sizes) > 1:
assert (
activation_fun is not None
), "Multiple linear layers without an activation"
self._out_size = out_size
self._use_biases = use_biases
self._activation_fun = activation_fun
self._dropout_rate = dropout_rate
self._layers = [] # type: List[tf.keras.layers.Dense]
self._name = name
def build(self, input_shape):
last_shape_dim = input_shape[-1]
for hidden_layer_idx, hidden_layer_size in enumerate(self._hidden_layer_sizes):
with tf.name_scope(f"{self._name}_dense_layer_{hidden_layer_idx}"):
self._layers.append(
tf.keras.layers.Dense(
units=hidden_layer_size,
use_bias=self._use_biases,
activation=self._activation_fun,
name=f"{self._name}_dense_layer_{hidden_layer_idx}",
)
)
self._layers[-1].build(tf.TensorShape(input_shape[:-1] + [last_shape_dim]))
last_shape_dim = hidden_layer_size
# Output layer:
with tf.name_scope(f"{self._name}_final_layer"):
self._layers.append(
tf.keras.layers.Dense(
units=self._out_size,
use_bias=self._use_biases,
name=f"{self._name}_final_layer",
)
)
self._layers[-1].build(tf.TensorShape(input_shape[:-1] + [last_shape_dim]))
super().build(input_shape)
@tf.function(experimental_relax_shapes=True)
def call(self, input: tf.Tensor, training: bool) -> tf.Tensor:
activations = input
for layer in self._layers[:-1]:
if training:
activations = tf.nn.dropout(activations, rate=self._dropout_rate)
activations = layer(activations)
return self._layers[-1](activations)