# HWK 5 PROBLEM 1

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### The binary hinge loss criterion is not implemented in pytorch, so we have to implement it ourselves. Try to understand the code below:

In [20]:
def HingeLossCriterion(scores,labels):
    """
    scores is a bs-by-1 FloatTensor that contains a batch of scores
    labels is a LongTensor of size bs that contains a batch of labels
    """
    return F.relu(1-scores.view(labels.size())*labels.float()).mean()

### Let's check that the Criterion works properly:

In [21]:
scores = torch.Tensor([0.7,-0.1]).view(2,1)
labels = torch.LongTensor([+1,-1])
print(scores)
print(labels)

tensor([[ 0.7000],
        [-0.1000]])
tensor([ 1, -1])


### Note that the loss on this batch of scores should be: $ \frac{1}{2} \left( 0.3 + 0.9 \right) =0.6 $ and indeed, our criterion compute it correctly:

In [22]:
HingeLossCriterion(scores,labels)

tensor(0.6000)

### Make the 2 layer net described in the hwk:

In [23]:
class two_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size,  output_size):
        super().__init__()
        
        self.layer1 = nn.Linear(input_size, hidden_size, bias=False)
        self.layer2 = nn.Linear(hidden_size, output_size, bias=False)
        
    def forward(self, x): 
        
        x = self.layer1(x)
        x = F.relu(x)
        scores = self.layer2(x)
        
        
        return scores
    
    
net = two_layer_net(2, 2, 1)
print(net)

two_layer_net(
  (layer1): Linear(in_features=2, out_features=2, bias=False)
  (layer2): Linear(in_features=2, out_features=1, bias=False)
)


### Set up the weights manually so that they are:
 $$
U = \begin{bmatrix}
0 & 1 \\
1 & 0
\end{bmatrix} \qquad  \text{and} \qquad
V = \begin{bmatrix}
\frac{1}{4} & -\frac{1}{2} 
\end{bmatrix}
$$

In [30]:
net.layer1.weight.data = torch.Tensor([[0,1], [1,0]])
net.layer2.weight.data = torch.Tensor([[0.25, -0.5]])
print(net.layer1.weight)
print(net.layer2.weight)

Parameter containing:
tensor([[0., 1.],
        [1., 0.]], grad_fn=<CopySlices>)
Parameter containing:
tensor([[ 0.2500, -0.5000]], requires_grad=True)


In [31]:
net.layer1.weight.data

tensor([[0., 1.],
        [1., 0.]])

### Make a tensor train\_data and a tensor train\_label containing the training set. Make sure that your tensors have the right sizes and the right type! Remember that in pytorch, dimension 0 is the batch dimension. 

In [32]:
train_data = torch.Tensor([[-1,2],[-1,5],[-1,-1],[1,1]])
print(train_data)
train_label= torch.Tensor([1,1,-1,-1])
print(train_label)

tensor([[-1.,  2.],
        [-1.,  5.],
        [-1., -1.],
        [ 1.,  1.]])
tensor([ 1.,  1., -1., -1.])


### Compute the loss on the minibatch that consists of the first and last data point. To do this you need to slice the tensors train\_data and train\_label at the correct indices (and in order to do this you need to create a longtensor that contains the indices of the data points you want to extract)

In [33]:
# reset the weights so that the results of the next part are not affected by the previous
indices = [0,3]
minibatch_data = train_data[indices]
minibatch_label = train_label[indices]
# forward the minibatch through the net  
scores = net(minibatch_data)
# Compute the average of the losses of the data points in the minibatch
loss = HingeLossCriterion(scores, minibatch_label)
print(loss)

tensor(0.6250, grad_fn=<MeanBackward1>)


### Compute the average loss on the full training set

In [34]:
scores2 = net(train_data)
# Compute the average of the losses of the data points in entire set
loss = HingeLossCriterion(scores2, train_label)
print(loss)


tensor(0.5625, grad_fn=<MeanBackward1>)
