# Import tools

In [None]:
# built-in utilities
import copy
import os
import time
import datetime

# data tools
import numpy as np

# pytorch 
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# tensorflow
!pip install -q tf-nightly-2.0-preview
from tensorflow import summary
%load_ext tensorboard

# visualization
import matplotlib.pyplot as plt
%matplotlib inline

# Load data

In [None]:
# define data transformer
transformation = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))]
)

## download data
# train
trainDataset = datasets.MNIST(
    "../data", train=True, transform=transformation, download=True
)

# test
testDataset = datasets.MNIST(
    "../data", train=False, transform=transformation, download=True
)

## load data
trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=128, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=True)

In [None]:
# plot sample image
def plotSample(image):
    image = image.numpy()
    mean = 0.1307
    std = 0.3081
    image = (mean * image) + std
    plt.imshow(image, cmap="gray")
    
sample = iter(trainDataLoader.dataset.data)
plotSample(next(sample))

# Question 1

## Question 1, Part 1


Train a fully-connected net for MNIST classification (sorry,no CNN please, yet). It should be with 5 hidden layers each of which is with 1024 hidden units. Feel free to use whatever techniques you learned in class. You should be able to get the test accuracy above 98%.

### Neural network architecture

In [None]:
# set model architecture
class fcNet(nn.Module):
    def __init__(self):
        super(fcNet, self).__init__()
        self.inputLayer = nn.Linear(784, 1024)
        self.fullyConnected1 = nn.Linear(1024, 1024)
        self.fullyConnected2 = nn.Linear(1024, 1024)
        self.fullyConnected3 = nn.Linear(1024, 1024)
        self.fullyConnected4 = nn.Linear(1024, 1024)
        self.fullyConnected5 = nn.Linear(1024, 1024)
        self.outputLayer = nn.Linear(1024, 10)
        
    def forward(self, x):
        x = F.relu(self.inputLayer(x))
        x = F.relu(self.fullyConnected1(x))
        x = F.relu(self.fullyConnected2(x))
        x = F.relu(self.fullyConnected3(x))
        x = F.relu(self.fullyConnected4(x))
        x = F.relu(self.fullyConnected5(x))
        x = F.log_softmax(self.outputLayer(x), dim=1)
        return x

### Parameter setup

In [None]:
# set input kwargs as object attributes
class ParamConfig:  
  def __init__(self, **kwargs):
    for key, value in kwargs.items():
      setattr(self, key, value)

# configure all necessary parameters
modelParams = ParamConfig(
    model = fcNet,
    optimizer = torch.optim.Adam,
    criterion = F.nll_loss,
    trainDataLoader = torch.utils.data.DataLoader(trainDataset, batch_size=128, shuffle=True),
    testDataLoader = torch.utils.data.DataLoader(testDataset, batch_size=128, shuffle=True),
    cuda = True if torch.cuda.is_available() else False,
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    seed = 0,
    lr = 0.001,
    epochs = 50,
    saveModel = True,
    tensorboardFiles = False,
)

### Training loop / test set evaluation process

In [None]:
class PyTorchTrainer:
    """
    
    """
    def __init__(self, config):
        self.globaliter = 0

        # data loaders
        self.trainDataLoader = config.trainDataLoader
        self.testDataLoader = config.testDataLoader
        
        # random seed settings
        self.seed = config.seed        
        torch.manual_seed(self.seed)
        
        # device settings
        self.cuda = config.cuda
        self.device = config.device
        kwargs = {'num_workers': 1, 'pin_memory': True} if self.cuda else {}

        # model training settings
        self.model = config.model().to(self.device)
        self.lr = config.lr
        self.epochs = config.epochs
        self.optimizer = config.optimizer(self.model.parameters(), lr=self.lr)
        self.criterion = config.criterion

        # save model
        self.saveModel = config.saveModel

        # tensorboard
        self.tensorboardFiles = config.tensorboardFiles
        if self.tensorboardFiles:
            current = str(datetime.datetime.now().timestamp())
            trainLogDir = "logs/mnist1/train/" + current
            testLogDir = "logs/mnist1/test/" + current
            self.trainSummaryWriter = SummaryWriter(trainLogDir)
            self.testSummaryWriter = SummaryWriter(testLogDir)
        else:
            self.trainSummaryWriter = None
            self.testSummaryWriter = None

        # statistics
        self.trainLoss = []
        self.testLoss = []
        self.testAccuracy = []

            
    def train(self, epoch):
        # set model to train mode
        self.model.train()
        print("*" * 80)
        
        # iterate through batches
        for batchIdx, (data, target) in enumerate(self.trainDataLoader):
            self.globaliter += 1
            
            # reshape data as needed and send data to GPU if available
            data = data.reshape(-1, 28*28).to(self.device)
            target = target.to(self.device)

            # zero out gradients
            self.optimizer.zero_grad()
            
            # generate predictiona
            preds = self.model(data)

            # calculate loss given current predictions vs. ground truth
            loss = self.criterion(preds, target)
            
            # back propagate error and optimize weights
            loss.backward()
            self.optimizer.step()

            # capture batch loss
            self.trainLoss.append(loss)

            if batchIdx % 100 == 0:
                print("Train Epoch: {} | Batch: {} [Processed {}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch, batchIdx, batchIdx * len(data), len(self.trainDataLoader.dataset),
                    100. * batchIdx / len(self.trainDataLoader), loss.item()))
            
            # tensorboard
            if self.tensorboardFiles:
                self.trainSummaryWriter.add_scalar('loss', loss.item(), global_step=self.globaliter)
        print()

    def test(self, epoch):
        # set model to eval mode
        self.model.eval()
        testLoss = 0
        correct = 0

        # turn off gradients
        with torch.no_grad():
            # iterate through batches
            for batchIdx, (data, target) in enumerate(self.testDataLoader):
                # reshape data as needed and send data to GPU if available
                data = data.reshape(-1, 28*28).to(self.device)
                target = target.to(self.device)

                # generate predictiona
                preds = self.model(data)

                # calculate loss given current predictions vs. ground truth
                testLoss = self.criterion(preds, target).item()
                preds = preds.argmax(dim=1, keepdim=True)
                
                # capture count of correct answers
                correct += preds.eq(target.view_as(preds)).sum().item()

                # capture batch loss
                self.testLoss.append(testLoss)

            # overall epoch loss and accuracy
            testLoss /= len(self.testDataLoader.dataset)
            accuracy = 100. * correct / len(self.testDataLoader.dataset)

            # capture batch loss
            self.testAccuracy.append(accuracy)

            print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n\n'.format(
                testLoss, correct, len(self.testDataLoader.dataset), accuracy))

            # tensorboard
            if self.tensorboardFiles:
                self.testSummaryWriter.add_scalar('loss', testLoss, global_step=self.globaliter)
                self.testSummaryWriter.add_scalar('accuracy', accuracy, global_step=self.globaliter)

### Execute

In [None]:
### fit model
# instantiate model object
trainer = PyTorchTrainer(config=modelParams)

# iterate fitting procedure over specified epoch count
for epoch in range(1, trainer.epochs + 1):
    trainer.train(epoch)
    trainer.test(epoch)

# save model
if trainer.saveModel:
    if not os.path.isdir("models/"):
        os.mkdir("models/")
    PATH = "models/mnist_hw1_q1_2.pt"
    torch.save(trainer.model.state_dict(), PATH)

### Evaluate

In [None]:
# plot test accuracy
fig, ax = plt.subplots(figsize=(20,10), facecolor="white")
ax.plot(trainer.testAccuracy)
plt.title("Test set accuracy")
plt.show()

## Question 1, Part 3

Once you’re done with training, as a starter, do a feedforward step on your test samples, a thousand of them. Capture the output of the softmax layer, which will be a 10-dim probability vector per sample. In other words, each output dimension has 1,000 predictions corresponding to the 1,000 examples. For each 10-d output vector, find the dim with the maximum probability (which will eventually decide the class label). Plot the input image associated with that in a grid of subplots. For example, you can create a 10 × 10 grid of subplots, whose first row plots first ten input images that produced the highest probabilities for the first dim (which corresponds to “0”). Eventually, if your classification was near perfect, you’ll see ten 0’s in the first row, ten 1’s in the second, and so on.

### Reload model

In [None]:
# load model
PATH = "models/mnist_hw1_q1_2.pt"
model = fcNet().to(device)
model.load_state_dict(torch.load(PATH))
model.eval()

### Forward hook class

In [None]:
class ForwardHook():
    activations=None
    def __init__(self, m):
      self.hook = m.register_forward_hook(self.hook_fn)
    
    def hook_fn(self, module, input, output):
      self.layerIn = input
      self.layerOut = output.data
    
    def remove(self):
      self.hook.remove()

### Data preparation and layer capture

In [None]:
## gather data
# images
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# target labels
targets = trainDataset.targets[:1000]

# create hook object
imageActivations = ForwardHook(model.outputLayer)
predictions = model(images)

### Sample visualization grid

In [None]:
# plot grid
imagesPerClass = 10

softmaxPreds = torch.argmax(F.log_softmax(imageActivations.layerOut, dim=1), dim=1).cpu().numpy()
imagesNew = images.cpu().numpy().reshape(-1, 28, 28)
for label in range(10):
    fig, ax = plt.subplots(1, imagesPerClass, figsize=(20,2))    
    for i, (ax, imageIx) in enumerate(zip(ax.flatten(), np.argwhere(softmaxPreds == label)[:imagesPerClass])):
        ax = fig.add_subplot(1, imagesPerClass, i+1)
        ax.imshow(imagesNew[imageIx][0], cmap='gray_r')
        ax.axis('tight')
        ax.axis('off')
        # remove the x and y ticks
    plt.setp(plt.gcf().get_axes(), xticks=[], yticks=[])        

## Question 1, Part 4

Repeat the procedure in Problem 1.3 for your second to the last layer output. This time, you should have 1024-dim vector per sample. Choose 10 random dimensions of interest and repeat the procedure in 1.3 as if the 10 out of 1024 dimensions are your output vectors. Note that there can be some dimensions that are with less than 10 images associated, because they are not popular. In your 10 × 10 grid, now there must be some rows that are not with enough number of images or even an empty rows. Explain your observation compared with the results from 1.3. What can you see? What would have been the ideal situation for this second-to- the-last layer? Feel free to investigate the other layers if you want, but I wouldn’t care because we have a better way.



### Data preparation and layer capture

In [None]:
## gather data
# images
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# target labels
targets = trainDataset.targets[:1000]

# create hook object
imageActivations = ForwardHook(model.outputLayer)
predictions = model(images)

## capture second to last layer
# second to last layer data
secondToLast = F.log_softmax(imageActivations.layerIn[0], dim=1).cpu().detach()

# ensure a minimum number of the randomly selected neurons include >= 1 non-zero number 
minNonZero = 0
while minNonZero < 5:
    randomIndices = np.random.randint(0, 1024, 1024)[:10]
    fauxSoftmaxPreds = torch.argmax(secondToLast[:, randomIndices],dim=1)
    minNonZero = len(torch.unique(fauxSoftmaxPreds))

# image data
imagesNew = images.cpu().numpy().reshape(-1, 28, 28)

### Sample visualization grid

In [None]:
## plot data
imagesPerClass = 10

for label in range(10):
    fig, ax = plt.subplots(1, imagesPerClass, figsize=(20,2))    
    for i, (ax, imageIx) in enumerate(zip(ax.flatten(), np.argwhere(fauxSoftmaxPreds == label)[0][:imagesPerClass])):
        ax = fig.add_subplot(1, imagesPerClass, i+1)
        try:
            ax.imshow(imagesNew[imageIx], cmap='gray_r')
            ax.axis('tight')
            ax.axis('off')
        except IndexError:
            pass
    # remove the x and y ticks
    plt.setp(plt.gcf().get_axes(), xticks=[], yticks=[])        

## Question 1, Part 6

First, take a thousand test samples from your MNIST dataset. Apply tSNE and PCA on the flattend 784-dim pixels. Now you have 2 × 1000 (or 1000 × 2 if you transposed the data) matrix from each of the dim reduction algorithms. Scatter plot the data samples. USE THE LABELS OF THE DATA SAMPLES SO THAT EACH SET OF SAMPLES FROM THE SAME CLASS ARE REPRESENTED WITH THE SAME COLOR. OVERLAY THE CLASS LABEL ON TOP OF THE MEAN OF THE CLASS. By doing so, you can examine if your data is easy to classify or not. Do you think this raw image samples are easy to classify? For your information I share my scatter plot of the first two classes in Figure 1. It looks easy because there are only two classes, but with all 10 classes the situation will be different. Your plot should be similar to this but with all 10 classes.

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import seaborn as sns

In [None]:
# flatten input data
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)
imagesNumpy = images.cpu().numpy()

### PCA (inputs)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(imagesNumpy)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - inputs")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### tSNE (inputs)

In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(imagesNumpy)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - inputs")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

## Question 1, Part 7

Do a feedforward using your classifier. Capture the output of your first hidden layer, which will give you 1024 × 1000 matrix. What that means is that you transformed your input data into a 1024-dim space. You may hope that this makes your classification easier. Check it out by doing tSNE and PCA on this matrix, which will once again give you 2 × 1000 matrix. Scatter plot and check out if this layer gives you a better representation in terms of classification.


### Capture data (1st hidden layer output)

In [None]:
# capture input data
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.fullyConnected1)

# forward pass
predictions = model(images)

# extract layer output
fcOneOut = imageActivations.layerOut.cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

### PCA (1st hidden layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(fcOneOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - 1st hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### tSNE (1st hidden layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(fcOneOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - 1st hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

## Question 1, Part 8

Repeat this procedure for all your layers including the last one. Explain your observation.

### 2nd hidden layer output

#### Capture data (2nd hidden layer output)

In [None]:
# convert flatData to torch tensor and reshape
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.fullyConnected2)

# forward pass
predictions = model(images)

# extract layer output
fcTwoOut = imageActivations.layerOut.cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

#### PCA (2nd hidden layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(fcTwoOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - 2nd hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

#### tSNE (2nd hidden layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(fcTwoOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - 2nd hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### 3rd hidden layer output

#### Capture data (3rd hidden layer output)

In [None]:
# convert flatData to torch tensor and reshape
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.fullyConnected3)

# forward pass
predictions = model(images)

# extract layer output
fcThreeOut = imageActivations.layerOut.cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

#### PCA (3rd hidden layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(fcThreeOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - 3rd hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

#### tSNE (3rd hidden layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(fcThreeOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - 3rd hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### 4th hidden layer output

#### Capture data (4th hidden layer output)

In [None]:
# convert flatData to torch tensor and reshape
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.fullyConnected4)

# forward pass
predictions = model(images)

# extract layer output
fcFourOut = imageActivations.layerOut.cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

#### PCA (4th hidden layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(fcFourOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - 4th hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

#### tSNE (4th hidden layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(fcFourOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - 4th hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### 5th hidden layer output

#### Capture data (5th hidden layer output)

In [None]:
# convert flatData to torch tensor and reshape
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.fullyConnected5)

# forward pass
predictions = model(images)

# extract layer output
fcFiveOut = imageActivations.layerOut.cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

#### PCA (5th hidden layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(fcFiveOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - 5th hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

#### tSNE (5th hidden layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(fcFiveOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - 5th hidden layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

### Output layer output

#### Capture data (Output layer output)

In [None]:
# convert flatData to torch tensor and reshape
images = trainDataset.data.view(-1, 28*28)[:1000].float().to(device)

# create hook object
imageActivations = ForwardHook(model.outputLayer)

# forward pass
predictions = model(images)

# extract layer output
outputOut = F.log_softmax(imageActivations.layerOut, dim=1).cpu()
print("Tensor shape: {}".format(fcOneOut.shape))

#### PCA (Output layer output)

In [None]:
pca = PCA(n_components=2)
pcaData = pca.fit_transform(outputOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=pcaData[:,0], y=pcaData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("PCA - Output layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(pcaData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(pcaData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")

#### tSNE (Output layer output)


In [None]:
tsne = TSNE(n_components=2)
tsneData = tsne.fit_transform(outputOut)

plt.figure(figsize=(20,10), facecolor="white")
sns.scatterplot(
    x=tsneData[:,0], y=tsneData[:,1],
    hue=targets,
    palette="viridis",
    legend="full",
    alpha=0.9
)
plt.title("tSNE - Output layer output")
for classLabel in torch.unique(trainDataset.targets).numpy():
    x = np.mean(tsneData[np.argwhere(targets == classLabel),0][0])
    y = np.mean(tsneData[np.argwhere(targets == classLabel),1][0])
    plt.text(x, y, str(classLabel), color="black", fontsize=30, fontweight="bold")