# Problem 5 : Non CE loss function

In [4]:
import torch
import torch.nn as nn
from torch.optim import Optimizer
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
from random import shuffle
'''
Step 1: Data
'''
# Use data with only 4 and 9 as labels: which is hardest to classify
label_1, label_2 = 4, 9

# MNIST training data
train_set = datasets.MNIST(root='./mnist_data/', train=True, transform=transforms.ToTensor(), download=True)

# Use data with two labels
idx = (train_set.targets == label_1) + (train_set.targets == label_2)
train_set.data = train_set.data[idx]
train_set.targets = train_set.targets[idx]
train_set.targets[train_set.targets == label_1] = -1
train_set.targets[train_set.targets == label_2] = 1

# MNIST testing data
test_set = datasets.MNIST(root='./mnist_data/', train=False, transform=transforms.ToTensor())

# Use data with two labels
idx = (test_set.targets == label_1) + (test_set.targets == label_2)
test_set.data = test_set.data[idx]
test_set.targets = test_set.targets[idx]
test_set.targets[test_set.targets == label_1] = -1
test_set.targets[test_set.targets == label_2] = 1
    

'''
Step 2: (same step)
'''
class Linear(nn.Module) :

    def __init__(self, input_dim=28*28) :
        super().__init__()
        self.linear = nn.Linear(input_dim, 1, bias=False)

    def forward(self, x) :
        return self.linear(x.float().view(-1, 28*28))

'''
Step 3: Create the model, specify loss function and optimizer. (LOOK HERE)
'''
model_CE = Linear()
model_MSE = Linear()

def CE_loss(output, target):
    return torch.mean(-torch.nn.functional.logsigmoid(target.reshape(-1)*output.reshape(-1)))

def MSE_loss(output, target):
    output = output.reshape(-1)
    target = target.reshape(-1)
    B = len(output)
    total = 0
    for i in range(B):
        y = target[i] 
        z = output[i]
        total += 0.5*(1-y)*((1-torch.sigmoid(-z))**2 + torch.sigmoid(z)**2)
        total += 0.5*(1+y)*(torch.sigmoid(-z)**2 + (1-torch.sigmoid(z))**2)
    return total/B


optimizer_CE = torch.optim.SGD(model_CE.parameters(), lr=255*1e-4)   
optimizer_MSE = torch.optim.SGD(model_MSE.parameters(), lr=255*1e-4)   


'''
Step 4: Train model with SGD (LOOK HERE)
'''
train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)

for epoch in range(3) :
    for images, labels in train_loader :
        optimizer_CE.zero_grad()
        optimizer_MSE.zero_grad()

        train_loss_CE = CE_loss(model_CE(images), labels.float())
        train_loss_CE.backward()
        
        train_loss_MSE = MSE_loss(model_MSE(images), labels.float())
        train_loss_MSE.backward()

        optimizer_CE.step()
        optimizer_MSE.step()

'''
Step 5: (same step)
'''
test_loss_CE, correct_CE = 0, 0
test_loss_MSE, correct_MSE = 0, 0

test_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=False)

for ind, (image, label) in enumerate(test_loader) :

    output_CE = model_CE(image)
    output_MSE = model_MSE(image)
    test_loss_CE += CE_loss(output_CE, label.float()).item()
    test_loss_MSE += MSE_loss(output_MSE, label.float()).item()


    # Make a prediction
    if output_CE.item() * label.item() >= 0 : 
        correct_CE += 1
    if output_MSE.item() * label.item() >= 0 : 
        correct_MSE += 1

# Print out the results
print("Cross entropy loss")
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss_CE /len(test_loader), correct_CE, len(test_loader),
        100. * correct_CE / len(test_loader)))
print("Mean squared loss")
print('[Test set] Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss_MSE /len(test_loader), correct_MSE, len(test_loader),
        100. * correct_MSE / len(test_loader)))



Cross entropy loss
[Test set] Average loss: 0.1749, Accuracy: 1894/1991 (95.13%)

Mean squared loss
[Test set] Average loss: 0.1007, Accuracy: 1896/1991 (95.23%)



> 큰 차이가 없는 것으로 보여진다.