In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,
    transform = ToTensor(),
    download = True,
)
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    transform = ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [2]:
from torch.utils.data import DataLoader
loaders = {
    'train' : torch.utils.data.DataLoader(train_data,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=1),

    'test'  : torch.utils.data.DataLoader(test_data,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=1),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7f05d1e2b450>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f05d1e91a90>}

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784,64)
        self.fc2 = nn.Linear(64,64)
        self.out = nn.Linear(64,10)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.out(x)
        return x

In [None]:

# loss_func = nn.MSELoss()
# from torch import optim


In [4]:
class Lowdim():
    def __init__(self,loss_func):
        super(Lowdim, self).__init__()
        
        self.proj = nn.Linear(1000,55050,bias=False)
        for param in self.proj.parameters():
          param.requires_grad= True

        self.low_dim = nn.Parameter(torch.rand([1000],requires_grad=False))
    
        self.orig = Net()
        self.base = torch.nn.utils.parameters_to_vector(self.orig.parameters())
        self.loss = loss_func
        for param in self.orig.parameters():
          param.requires_grad= True

    def forward(self, x):
        x = self.orig(x)
        x = F.softmax(x)
        return x

    def backward(self, x, y):
        pred = self.forward(x)
        lossval = (self.loss(y,pred))
        lossval.backward()
        
        grads = []
        for param in self.orig.parameters():
          grads.append(param.grad.view(-1))
        grads = torch.cat(grads)
        
        compute = self.proj(self.low_dim)
        diff = loss_func(grads,compute)
        diff.backward
        
        self.low_dim = self.low_dim - 0.01*self.low_dim.grad
        
        projection = torch.matmul(self.proj,self.low_dim)
        tmp = self.base + projection
        torch.nn.utils.vector_to_parameters(tmp, self.orig.parameters())
        
        return pred

In [None]:
low = Lowdim(nn.MSELoss())


In [5]:
class Lowdim():
    def __init__(self,loss_func):
        super(Lowdim, self).__init__()
        self.proj = (torch.rand([55050,1000],requires_grad=False))/55050
        self.low_dim = nn.Parameter(torch.rand([1000],requires_grad=True))
        self.orig = Net()
        self.base = torch.nn.utils.parameters_to_vector(self.orig.parameters())
        self.loss = loss_func
        for param in self.orig.parameters():
          param.requires_grad= True
        self.inverse = torch.linalg.pinv(self.proj)

    def forward(self, x):
        x = self.orig(x)
        x = F.softmax(x)
        return x

    def backward(self, x, y):
        pred = self.forward(x)
        lossval = (self.loss(y,pred))
        lossval.backward()
        
        grads = []
        for param in self.orig.parameters():
          grads.append(param.grad.view(-1))
        grads = torch.cat(grads)
        
        diff = torch.matmul(self.inverse,grads)
        self.low_dim = self.low_dim - 0.01*diff
        projection = torch.matmul(self.proj,self.low_dim)
        tmp = self.base + projection
        torch.nn.utils.vector_to_parameters(tmp, self.orig.parameters())
        
        return pred

In [6]:
low = Lowdim(nn.MSELoss())
loss_func = nn.CrossEntropyLoss()


In [7]:
low.inverse.shape

torch.Size([1000, 55050])

In [8]:
for epoch in range(10): 
    running_loss = 0.0
    for i, data in enumerate(loaders["train"], 0):
        inputs, labels = data
        inputs = torch.flatten(inputs,start_dim=2)
        inputs = torch.squeeze(inputs)
        output = low.backward(inputs,F.one_hot(labels,num_classes= 10).float())
        loss = loss_func(output, F.one_hot(labels,num_classes= 10).float())
        running_loss += loss
    print(running_loss)

  from ipykernel import kernelapp as app


tensor(1380.4620, grad_fn=<AddBackward0>)
tensor(1367.6637, grad_fn=<AddBackward0>)
tensor(1318.6346, grad_fn=<AddBackward0>)
tensor(1135.2133, grad_fn=<AddBackward0>)
tensor(1010.9172, grad_fn=<AddBackward0>)
tensor(990.2319, grad_fn=<AddBackward0>)
tensor(983.3428, grad_fn=<AddBackward0>)
tensor(977.1933, grad_fn=<AddBackward0>)
tensor(975.5359, grad_fn=<AddBackward0>)
tensor(976.5643, grad_fn=<AddBackward0>)


In [10]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in loaders["test"]:
        images, labels = data
        images = torch.flatten(images,start_dim=2)
        images = torch.squeeze(images)
        # calculate outputs by running images through the network
        outputs = low.forward(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

  from ipykernel import kernelapp as app


In [11]:
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 84 %


In [12]:
predicted

tensor([9, 2, 7, 3, 5, 2, 0, 7, 8, 7, 5, 4, 5, 8, 9, 3, 7, 0, 4, 0, 8, 7, 0, 0,
        6, 2, 2, 8, 0, 4, 4, 5, 1, 6, 8, 7, 9, 7, 0, 2, 6, 4, 9, 9, 7, 7, 3, 1,
        8, 7, 0, 1, 6, 7, 3, 2, 9, 8, 2, 6, 6, 6, 9, 1, 5, 6, 6, 6, 9, 1, 7, 1,
        6, 0, 1, 8, 3, 8, 6, 2, 8, 3, 8, 8, 4, 3, 3, 6, 0, 6, 5, 0, 3, 1, 2, 4,
        8, 9, 8, 1])

In [None]:
outputs