# Exploration of the dataset

Due to the very high accuracy of a simple linear model, we formulate:

__Hypothesis__: The dataset is close to linearly separable.

We'll explore different visualization techniques to represent the dataset.

#### Import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torchvision import datasets as dt, transforms

from torch.utils.data import DataLoader
import torch.utils.tensorboard as tensorboard

import numpy as np
import matplotlib.pyplot as plt

import helpers.NNUtils as nnu

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#to check if this necessary
torch.manual_seed(1976);

#### Import data

In [3]:
train_dataset, test_dataset, val_dataset = nnu.loadMNISTDatasets()
train_loader, test_loader, val_loader = nnu.getMNISTLoaders([train_dataset, test_dataset, val_dataset],batch_size=1000)

## Visualization using Tensorboard

(Some code taken from: https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html)

#### Import tensorboard library

In [4]:
# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [5]:
writer = tensorboard.SummaryWriter('runs/digits_exploration_1')

In [6]:
features, labels = iter(train_loader).next()
features = features.view(-1, 28, 28)

writer.add_image('image', np.ones((3,3,3)), 0)
print(features.unsqueeze(1).shape)
writer.add_embedding(features.view(-1, 28*28), metadata=labels, label_img=(features/2.0+0.5).unsqueeze(1), global_step=0)
writer.close()

torch.Size([1000, 1, 28, 28])
