In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import numpy as np
import matplotlib.pyplot as plt

In [2]:
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)
test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

Create training and test set with 500 examples of classes 5 and 8 respectively and change labels to 0 and 1. At the same time flip 30% of the labels.

In [3]:
train_set = [ex for ex in train_dataset if ex[1]==5][:500] + [ex for ex in train_dataset if ex[1]==8][:500]
test_set = [ex for ex in test_dataset if ex[1]==5][:500] + [ex for ex in test_dataset if ex[1]==8][:500]

In [4]:
train_set_01_images = []
test_set_01_images = []
train_set_01_labels = []
test_set_01_labels = []
num_flips = 500//3 #num_flips=0 to have no flips
for i,ex_tr in enumerate(train_set): # [0,499] ->5, [500,999] ->8
    ex_tr = list(ex_tr)
    if ex_tr[1]==5:
        if i<num_flips: # 0-30% of 5s(0-499) are flipped to 8s, 
            ex_tr[1]=1
            # 1 is the label for 8s
            # 0 is the label for 5s
        else: 
            ex_tr[1]=0
    else: 
        if i<num_flips+500: # 0-30% of 8s(500-999) are flipped to 5s
            ex_tr[1]=0
        else: 
            ex_tr[1]=1
    train_set_01_images.append(ex_tr[0])
    train_set_01_labels.append(ex_tr[1])
    
for ex_te in test_set:
    ex_te = list(ex_te)
    if ex_te[1]==5:
        ex_te[1]=0
    else: 
        ex_te[1]=1
    test_set_01_images.append(ex_te[0])
    test_set_01_labels.append(ex_te[1])
train_set_01_images = torch.stack(train_set_01_images)  # why stack? anwser: to make it a tensor, and the shape is (1000, 1, 28, 28)
train_set_01_images = train_set_01_images.view(-1, 28*28) # 1000, 784

test_set_01_images = torch.stack(test_set_01_images)
test_set_01_images = test_set_01_images.view(-1, 28*28)

train_set_01_labels = torch.tensor(train_set_01_labels)
test_set_01_labels = torch.tensor(test_set_01_labels) 

# shuffle training set
n_sample = len(train_set_01_images)
np.random.seed(0)
order = np.random.permutation(n_sample)
train_set_01_images = train_set_01_images[order]
train_set_01_labels = train_set_01_labels[order]

In [5]:
print("size train set:", train_set_01_images.shape)
print("size test set:", test_set_01_images.shape)

size train set: torch.Size([1000, 784])
size test set: torch.Size([1000, 784])


## logistic regression

In [6]:
# def sigmoid(X):
#     return 1/(1+torch.exp(-X))

# def f(X,theta): # x 1*784, theta 784*1
#     return sigmoid(torch.matmul(X,theta))

# def loss(X,y,theta):
#     epsilon = 1e-8 # to avoid nan
#     X = X.type(torch.float)
#     y = y.type(torch.float)
#     loss1 = -torch.matmul(y,torch.log(f(X,theta)+epsilon)) - torch.matmul((1-y),torch.log(1-f(X,theta)+epsilon))
#     return loss1

# def loss_grad(X,y,theta):
#     X = X.type(torch.float)
#     y = y.type(torch.float)
#     return torch.matmul(X.T, f(X,theta)-y).sum(1).view(-1,1)


In [7]:
# max_epochs = 10000
# step_size = 0.0000001
# theta1 = torch.ones(28*28,1)

# for i in range(max_epochs): 
#     grad = loss_grad(train_set_01_images, train_set_01_labels, theta1)
#     tmp = theta1
#     theta1 = theta1 - step_size * grad
#     print(loss(train_set_01_images, train_set_01_labels, theta1))


In [8]:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x
    
net = Net()

import torch.optim as optim
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

max_epochs = 1000
X = train_set_01_images.type(torch.float)
y = train_set_01_labels.type(torch.float).view(-1,1)
for epochs in range(100):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1,1), y)
    loss.backward()
    optimizer.step()
    print(loss.item())


0.6960861682891846
0.6947778463363647
0.6925340890884399
0.6897932887077332
0.686898410320282
0.6840081214904785
0.6811043620109558
0.678074836730957
0.6748166680335999
0.6713123917579651
0.6676486134529114
0.6639828085899353
0.6604799628257751
0.6572562456130981
0.6543488502502441
0.6517247557640076
0.6493131518363953
0.6470455527305603
0.6448848247528076
0.6428319811820984
0.6409149169921875
0.6391669511795044
0.6376073956489563
0.6362307667732239
0.6350095272064209
0.6339043378829956
0.63287752866745
0.6319031715393066
0.6309708952903748
0.6300820708274841
0.629244327545166
0.6284630298614502
0.6277372241020203
0.6270591616630554
0.6264159679412842
0.6257942914962769
0.6251840591430664
0.6245806813240051
0.6239843368530273
0.62339848279953
0.6228272318840027
0.6222730875015259
0.6217358112335205
0.6212129592895508
0.6207011938095093
0.6201969981193542
0.6196988821029663
0.6192066073417664
0.6187218427658081
0.6182464361190796
0.6177821755409241
0.6173298358917236
0.6168890595436096
