In [1]:
import torch

In [2]:
# This is the code that we found from the facebook research paper

In [3]:
class ScaledTanh(torch.nn.Module):
    def __init__(self, a=1):
        super(ScaledTanh, self).__init__()
        self.a = a

    def forward(self, x):
        return x.mul(self.a).tanh()


class EmbeddingPerceptron(torch.nn.Module):
    """
    Multilayer ReLU perceptron with learnable inputs
    """

    def __init__(self, sizes, multiplier=3):
        super(EmbeddingPerceptron, self).__init__()
        self.inputs = torch.arange(0, sizes[0]).long()

        layers = [torch.nn.Embedding(sizes[0], sizes[1])]
        for i in range(1, len(sizes) - 1):
            layers.append(torch.nn.Linear(sizes[i], sizes[i + 1]))
            if i < (len(sizes) - 2):
                layers.append(torch.nn.ReLU())

        self.net = torch.nn.Sequential(*layers)

        net_min, net_max = self().min().item(), self().max().item()

        a = 1.7159 / max(abs(net_min), abs(net_max))
        self.net = torch.nn.Sequential(self.net, ScaledTanh(a))

    def forward(self):
        return self.net(self.inputs)

In [4]:
network = EmbeddingPerceptron(sizes=[100, 128, 128, 128, 2])

In [5]:
network

EmbeddingPerceptron(
  (net): Sequential(
    (0): Sequential(
      (0): Embedding(100, 128)
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): ReLU()
      (3): Linear(in_features=128, out_features=128, bias=True)
      (4): ReLU()
      (5): Linear(in_features=128, out_features=2, bias=True)
    )
    (1): ScaledTanh()
  )
)

In [6]:
network().min()

tensor(-0.7127, grad_fn=<MinBackward1>)

In [7]:
network().max()

tensor(0.9374, grad_fn=<MaxBackward1>)

In [8]:
# Let's check to see what we can do about our method with ReLU

In [9]:
%pwd
%cd ../..

/Users/ryandevera/data-science/umn_environments/Deeplifting


In [10]:
from deeplifting.models import DeepliftingSkipMLP
from deeplifting.utils import initialize_vector

In [45]:
bounds = [(-10, 10)] * 2

model = DeepliftingSkipMLP(
    input_size=1,
    hidden_sizes=(128, 128),
    output_size=2,
    bounds=bounds,
    activation='relu',
    output_activation='sine',
    agg_function='sum',
    include_bn=True,
    seed=9,
)

model = model.to(dtype=torch.double)

In [46]:
model

DeepliftingSkipMLP(
  (layers): ModuleList(
    (0): DeepliftingBlock(
      (activation_layer): ReLU()
      (linear): Linear(in_features=10, out_features=128, bias=True)
      (batch_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (1-2): 2 x DeepliftingBlock(
      (activation_layer): ReLU()
      (linear): Linear(in_features=128, out_features=128, bias=True)
      (batch_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
  )
  (output_layer): DeepliftingBlock(
    (activation_layer): Identity()
    (linear): Linear(in_features=128, out_features=2, bias=True)
    (batch_norm): LayerNorm((2,), eps=1e-05, elementwise_affine=True)
  )
  (scaling_layer): DeepliftingScalingBlock()
)

In [47]:
import torch

device = torch.device('cpu')

inputs = torch.randn(1, 5 * 2)
inputs = inputs.to(device=device, dtype=torch.double)

In [48]:
model(inputs).mean(axis=0)

tensor([-0.4085, -1.2246], dtype=torch.float64, grad_fn=<MeanBackward1>)

In [49]:
torch.sin(model(inputs).sum(axis=0))

tensor([-0.3972, -0.9407], dtype=torch.float64, grad_fn=<SinBackward0>)

In [16]:
layer1 = torch.nn.Linear(100, 100)

In [17]:
layer1.weight.min(), layer1.weight.max()

(tensor(-0.1000, grad_fn=<MinBackward1>),
 tensor(0.1000, grad_fn=<MaxBackward1>))

In [18]:
gain = torch.nn.init.calculate_gain('relu')
gain

1.4142135623730951

In [19]:
torch.nn.init.kaiming_uniform_(layer1.weight, mode='fan_in', nonlinearity='relu')

Parameter containing:
tensor([[-0.2317,  0.1000,  0.0246,  ..., -0.0946,  0.2201, -0.1472],
        [ 0.2081, -0.0988, -0.0344,  ...,  0.1636,  0.1563, -0.1362],
        [ 0.2400,  0.0476,  0.0263,  ...,  0.1957, -0.1713, -0.1677],
        ...,
        [-0.0692, -0.2448, -0.1584,  ..., -0.1228,  0.1781,  0.0623],
        [ 0.1768, -0.0973,  0.0654,  ..., -0.0235,  0.2174,  0.0276],
        [ 0.2010,  0.0307, -0.1672,  ..., -0.2039, -0.1957, -0.0135]],
       requires_grad=True)

In [20]:
layer1.weight.min(), layer1.weight.max()

(tensor(-0.2449, grad_fn=<MinBackward1>),
 tensor(0.2449, grad_fn=<MaxBackward1>))

In [None]:
from sklearn.metrics import pre