In [2]:
import torch, math, copy
import numpy as np
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F

In [11]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

# Replace with path to MNIST on your machine
train_dataset = datasets.MNIST("/Users/lucastucker/REU-2023/archive/", train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)

test_dataset = datasets.MNIST("/Users/lucastucker/REU-2023/archive/", train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /Users/lucastucker/REU-2023/archive/MNIST/raw/train-images-idx3-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /Users/lucastucker/REU-2023/archive/MNIST/raw/train-labels-idx1-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /Users/lucastucker/REU-2023/archive/MNIST/raw/t10k-images-idx3-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /Users/lucastucker/REU-2023/archive/MNIST/raw/t10k-labels-idx1-ubyte.gz to /Users/lucastucker/REU-2023/archive/MNIST/raw



In [29]:
def train(epochs, model, criterion, optimizer, train_loader, test_loader, reduced_dim, t_nearest):
    for epoch in range(epochs):
        train_err = train_epoch(model, criterion, optimizer, train_loader, reduced_dim, t_nearest)
        test_err = test(model, test_loader, reduced_dim, t_nearest)
        print('Epoch {:03d}/{:03d}, Train Error {:.2f}% || Test Error {:.2f}%'.format(epoch, epochs, train_err*100, test_err*100))
    return train_err, test_err

In [54]:
def train_epoch(model, criterion, optimizer, loader, reduced_dim, t_nearest):
    total_correct = 0.
    total_samples = 0.

    for batch_idx, (data, target) in enumerate(loader):
        # NOTE: Uncomment the code below if you are using a GPU
        # if torch.cuda.is_available():
        #    data, target = data.cuda(), target.cuda()

        # NEW
        num_batches = data.shape[0]
        
        data = le_on_loader(data.view(784, -1), reduced_dim ** 2, t_nearest)
        data = data.reshape(num_batches, 1, reduced_dim, reduced_dim)

        output = model(data)

        loss = F.cross_entropy(output, target)
        preds = output.argmax(dim=1, keepdim=True)
        total_correct += preds.eq(target.view_as(preds)).sum().item() # compare preds to target
        total_samples += torch.numel(preds) # numel short for number of elements
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return 1 - total_correct/total_samples

In [46]:
def test(model, loader, reduced_dim, t_nearest):
    total_correct = 0.
    total_samples = 0.
    model.eval()

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(loader):
            # NOTE: Uncomment the code below if you are using a GPU
            # if torch.cuda.is_available():
            #    data, target = data.cuda(), target.cuda()

            # Problem is that le_on_loader depends on batch size
            num_batches = data.shape[0]
            data = le_on_loader(data.view(784, -1), reduced_dim ** 2, t_nearest).reshape(num_batches, 1, reduced_dim, reduced_dim)

            output = model(data)
            preds = output.argmax(dim=1, keepdim=True)
            total_correct += preds.eq(target.view_as(preds)).sum().item()
            total_samples += preds.numel()

    return 1 - total_correct/total_samples

In [32]:
%run /Users/lucastucker/REU-2023/laplacian_eigenmaps_functions.ipynb

In [51]:
def le_on_loader(X, reduced_dim, t_nearest):
    n, m = X.shape
    sigma = 0.3 # hyperparameter, 0.2 seems to work best
    N = t_nearest_matr(m, t_nearest, X)
    W = weight_matr(m, N, X, sigma)
    P = get_le_reduced(reduced_dim, X, W)
    return torch.FloatTensor(P)

In [49]:
class CNNeluBN(nn.Module):
    def __init__(self, reduced_dim):
        super(CNNeluBN, self).__init__()

        # write code here to instantiate layers
        # for example, self.conv = nn.Conv2d(1, 4, 3, 1, 1)
        # creates a conv layer with 1 input channel, 4 output
        # channels, a 3x3 kernel, and stride=padding=1
        self.layers = nn.ModuleList()

        self.layers.append(nn.Conv2d(1, 4, 3, 1, 1)) # 1 to 4 channels on 1 x red x red input
        self.layers.append(nn.BatchNorm2d(4))
        self.layers.append(nn.ELU())
        self.layers.append(nn.AvgPool2d(2, 2)) # Now size 4 x red // 2 x red // 2
        self.layers.append(nn.Conv2d(4, 8, 3, 1, 1)) # 4 to 8 channels
        self.layers.append(nn.BatchNorm2d(8))
        self.layers.append(nn.ELU())
        self.layers.append(nn.AvgPool2d(2, 2)) # Now size 8 x red // 2 // 2 x red // 2 // 2
        new_dim = (reduced_dim // 2) // 2
        self.layers.append(nn.Linear(8 * new_dim * new_dim, 10))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                sigma = 1 / (math.sqrt(9 * m.out_channels)) # 9 is k^2
                m.weight.data.normal_(0, sigma)
                m.bias.data.zero_()

    def forward(self, input):
      # print(input.shape)
      u = self.layers[0](input)
      for layer in self.layers[1:-1]:
         u = layer(u)
      num_batches = u.size()[0]
      u = u.view(num_batches, -1)
      return self.layers[-1](u)

In [20]:
criterion = torch.nn.CrossEntropyLoss() # TODO (implement in nn) 

In [55]:
lr = 0.01
reduced_dim = 14
t_nearest = 10
print(f"Training ELU CNN + BN with representation dimension {reduced_dim} x {reduced_dim}")
model = CNNeluBN(reduced_dim) # .cuda() 
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
print(f"train_loader")
train_errs, test_errs = train(3, model, criterion, optimizer, train_loader, test_loader, reduced_dim, t_nearest)
#       X = le_on_loader(input.view(784, -1), reduced_dim, t_nearest)

"""
  for reduced_dim in range(4, 29, 4):
  print("\nTraining ELU CNN + BN with {reduced_dim} layers".format(reduced_dim))
  model = CNNeluBN(reduced_dim).cuda()
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
  train_errs, test_errs = train(3, model, criterion, optimizer, train_loader, test_loader)
"""

Training ELU CNN + BN with representation dimension 14 x 14
train_loader


RuntimeError: shape '[96, 1, 14, 14]' is invalid for input of size 9216