# Build the Banana network

Now download the ex6.pdf (last week's exercise), let's build the banana model using pytorch.

Ex2 (3) computes the outputs of the network for the first text from Exercise 1. In order to make the computation by
hand feasible, use a smaller network with the following parameters:

In [86]:
import torch

w1 = torch.Tensor(
            [[0, 1, -2, 1],
            [2, 3, 0, -1],
            [1, 0, -3, 0]]
        )
b1 = torch.Tensor([0, 0, 0])

w2 = torch.Tensor(
            [[0, -2, 1],
            [2, 0, -1]]
        )
b2 = torch.Tensor([1, 1])

w3 = torch.Tensor(
            [-1, 1]
        )
b3 = torch.Tensor([1])




**Hint: How to initialize the weight?**

Example code:

In [87]:
import torch
import torch.nn as nn

class SimpleExample(nn.Module):

    def __init__(self):
        super().__init__()
        self.nn = nn.Linear(in_features=4, out_features=2, bias=True)

        w1 = torch.Tensor(
            [[1, 2, 3, 4],
             [10, 20, 30, 40]]
        ) 
        b1 = torch.Tensor([42, 84])
        self.nn.weight.data.copy_(w1)
        self.nn.bias.data.copy_(b1)

mdl = SimpleExample()
for name, param in mdl.named_parameters():
    print(name, param)

nn.weight Parameter containing:
tensor([[ 1.,  2.,  3.,  4.],
        [10., 20., 30., 40.]], requires_grad=True)
nn.bias Parameter containing:
tensor([42., 84.], requires_grad=True)


Requirements:
- Your banana network should have the name "Banana".
- The Banana network has three layers.
- For the first two layers, we use ReLU. The last layer, we use Sigmoid.

In [88]:
import torch
import torch.nn as nn

w1 = torch.Tensor(
            [[0, 1, -2, 1],
            [2, 3, 0, -1],
            [1, 0, -3, 0]]
        )
b1 = torch.Tensor([0, 0, 0])

w2 = torch.Tensor(
            [[0, -2, 1],
            [2, 0, -1]]
        )
b2 = torch.Tensor([1, 1])

w3 = torch.Tensor(
            [-1, 1]
        )
b3 = torch.Tensor([1])

class Banana(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(in_features=4, out_features=3)
        self.ac1 = nn.ReLU()
        self.layer2 = nn.Linear(in_features=3, out_features=2)
        self.ac2 = nn.ReLU()
        self.layer3 = nn.Linear(in_features=2, out_features=1)
        self.ac3 = nn.Sigmoid()

        self.layer1.weight.data.copy_(w1)
        self.layer1.bias.data.copy_(b1)
        self.layer2.weight.data.copy_(w2)
        self.layer2.bias.data.copy_(b2)
        self.layer3.weight.data.copy_(w3)
        self.layer3.bias.data.copy_(b3)

    def forward(self, x):
        nn1out = self.ac1(self.layer1(x))
        nn2out = self.ac2(self.layer2(nn1out))
        nn3out = self.ac3(self.layer3(nn2out))
        return nn3out

In [89]:
# After finishing your impl, run the following code, 
# check the parameter tensors are the same as the given weights
banana = Banana()
for name, param in banana.named_parameters():
    print(name, param)

layer1.weight Parameter containing:
tensor([[ 0.,  1., -2.,  1.],
        [ 2.,  3.,  0., -1.],
        [ 1.,  0., -3.,  0.]], requires_grad=True)
layer1.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
layer2.weight Parameter containing:
tensor([[ 0., -2.,  1.],
        [ 2.,  0., -1.]], requires_grad=True)
layer2.bias Parameter containing:
tensor([1., 1.], requires_grad=True)
layer3.weight Parameter containing:
tensor([[-1.,  1.]], requires_grad=True)
layer3.bias Parameter containing:
tensor([1.], requires_grad=True)


Q: 
- Use bag-of-words to represent text1, text2, and text3
- Input text1, text2, and text3 to your banana network

In [90]:
# complete the code
text1 = torch.Tensor([0, 0, 1, 0])
text2 = torch.Tensor([1, 1, 0, 1])
text3 = torch.Tensor([0, 0, 3, 0])

In [91]:
# get the out1, out2, out3. e.g., out1 = banana(text1)
out1 = banana(text1)
out2 = banana(text2)
out3 = banana(text3)
print(out1)
print(out2)
print(out3)

tensor([0.7311], grad_fn=<SigmoidBackward0>)
tensor([0.9933], grad_fn=<SigmoidBackward0>)
tensor([0.7311], grad_fn=<SigmoidBackward0>)


# Multi-class Classification 

Build a new neural net named "BananaCLS". 
- It has three layers, activation functions for the three layers are: 
    1. ReLU
    2. Sigmoid
    3. Softmax `nn.Softmax()`
- For each layers, the weights should be initialized using the given weights

In [92]:
w11 = torch.Tensor(
            [[0, 1, -2, 1],
            [2, 3, 0, -1],
            [1, 0, -3, 0]]
        )
b11 = torch.Tensor([0, 0, 0])

w22 = torch.Tensor(
            [[0, -2, 1],
            [2, 0, -1]]
        )
b22 = torch.Tensor([1, 1])

w33 = torch.Tensor(
            [[1, 1],
            [-1, 2],
            [0, -1]]
        )
b33 = torch.Tensor([0, 0, 0])

In [93]:
# Build you BananCLS network
class BananaCLS(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(in_features=4, out_features=3)
        self.ac1 = nn.ReLU()
        self.layer2 = nn.Linear(in_features=3, out_features=2)
        self.ac2 = nn.Sigmoid()
        self.layer3 = nn.Linear(in_features=2, out_features=3)
        self.ac3 = nn.Softmax()

        self.layer1.weight.data.copy_(w11)
        self.layer1.bias.data.copy_(b11)
        self.layer2.weight.data.copy_(w22)
        self.layer2.bias.data.copy_(b22)
        self.layer3.weight.data.copy_(w33)
        self.layer3.bias.data.copy_(b33)
    def forward(self, x):
        nn1out = self.ac1(self.layer1(x))
        nn2out = self.ac2(self.layer2(nn1out))
        nn3out = self.ac3(self.layer3(nn2out))
        return nn3out


In [94]:
# After building your class
# Print the name and params of your network
banana_cls = BananaCLS()
for name, param in banana_cls.named_parameters():
    print(name, param)

layer1.weight Parameter containing:
tensor([[ 0.,  1., -2.,  1.],
        [ 2.,  3.,  0., -1.],
        [ 1.,  0., -3.,  0.]], requires_grad=True)
layer1.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
layer2.weight Parameter containing:
tensor([[ 0., -2.,  1.],
        [ 2.,  0., -1.]], requires_grad=True)
layer2.bias Parameter containing:
tensor([1., 1.], requires_grad=True)
layer3.weight Parameter containing:
tensor([[ 1.,  1.],
        [-1.,  2.],
        [ 0., -1.]], requires_grad=True)
layer3.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)


- Run the forward pass for text1

In [95]:
# Run the forward pass for text 1
out_cls1 = banana_cls(text1)
print(out_cls1)

tensor([0.6278, 0.3022, 0.0700], grad_fn=<SoftmaxBackward0>)


  return self._call_impl(*args, **kwargs)


# Other questions:

The following code aims to build a two layer feedforward net. It has two layers, the input dim is 30, hidden dim is 50, output dim is also 50.

Find all bugs in the following code 

(Similar questions will be asked in the exam)

```
class MyModel:

    def __init__(self):
        super().init()
        self.nn1 = nn.Linear(in_features=30, out_features=50)
        self.ac1 = nn.ReLU()
        self.nn2 = nn.Linear(in_features=40, out_features=50)
        self.ac2 = nn.Sigmoid()

    def forward(input):
        out1 = self.nn1(input)
        out2 = self.ac1(out1)
        out3 = self.nn2(out2)
        out4 = self.ac2(out2)
        return out3
```

init() is not correct --> __init__()
output and input features don't match (50 to 40)
forward does not call self and does not return the 4th output 
and the 4th output should take out3 not out2 as it's argument.
MyModel needs nn.Module passed.

In [98]:
class MyModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.nn1 = nn.Linear(in_features=30, out_features=50)
        self.ac1 = nn.ReLU()
        self.nn2 = nn.Linear(in_features=40, out_features=50)
        self.ac2 = nn.Sigmoid()

    def forward(self, input):
        out1 = self.nn1(input)
        out2 = self.ac1(out1)
        out3 = self.nn2(out2)
        out4 = self.ac2(out3)
        return out4
    

mdl = MyModel()
for name, param in mdl.named_parameters():
    print(name, param)
print(mdl)

nn1.weight Parameter containing:
tensor([[ 0.1638,  0.1130,  0.0777,  ..., -0.1450,  0.0264,  0.0414],
        [-0.1532,  0.1423,  0.0822,  ...,  0.1496, -0.0890, -0.0589],
        [-0.0275,  0.1242,  0.1715,  ..., -0.0166, -0.1351,  0.0024],
        ...,
        [ 0.1523,  0.1523,  0.0721,  ...,  0.1017, -0.1700, -0.1116],
        [ 0.0122, -0.0666,  0.0574,  ..., -0.0922, -0.1756, -0.1260],
        [-0.0290,  0.0057, -0.0362,  ...,  0.1034, -0.0074,  0.0293]],
       requires_grad=True)
nn1.bias Parameter containing:
tensor([-0.0390, -0.1122, -0.1777, -0.0783, -0.1006, -0.1554, -0.1019,  0.1466,
         0.0926,  0.1215,  0.0045,  0.1327,  0.1741, -0.1153, -0.1660,  0.0404,
        -0.1143, -0.0048,  0.0797,  0.0940,  0.0099, -0.1209,  0.0059,  0.0263,
         0.1617,  0.0583,  0.0450, -0.0159,  0.0928,  0.0288,  0.1676, -0.1284,
        -0.0198, -0.0053,  0.1492,  0.0688,  0.0286, -0.0404,  0.1671,  0.1780,
         0.0138,  0.0364,  0.1632, -0.1303, -0.1084, -0.1654,  0.0995,  0.0