In [10]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms

In [7]:
training_data  = datasets.CIFAR10(root="./datasets/CIFAR10", train  = True, download=True)

100%|██████████| 170498071/170498071 [01:44<00:00, 1639019.46it/s]


Extracting ./datasets/CIFAR10/cifar-10-python.tar.gz to ./datasets/CIFAR10


In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 
])  # 
transform

Compose(
    ToTensor()
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
)

In [14]:
# Download and load the CIFAR-10 training dataset
train_dataset = datasets.CIFAR10(root='./datasets/CIFAR10', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)

Files already downloaded and verified


In [16]:
# Download and load the CIFAR-10 test dataset
test_dataset = datasets.CIFAR10(root='./datasets/CIFAR10', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

Files already downloaded and verified


In [21]:
# Extract all the data from the loaders
# This will load all the training data into memory
Xtr, Ytr = next(iter(train_loader))
Xte, Yte = next(iter(test_loader))

In [24]:
# Print the shapes to verify
print('Training data shape:', Xtr.shape)
print('Training labels shape:', Ytr.shape)
print('Test data shape:', Xte.shape)
print('Test labels shape:', Yte.shape)

Training data shape: torch.Size([50000, 3, 32, 32])
Training labels shape: torch.Size([50000])
Test data shape: torch.Size([10000, 3, 32, 32])
Test labels shape: torch.Size([10000])


In [31]:
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3)
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3)
Xtr_rows.shape, Xte_rows.shape

(torch.Size([50000, 3072]), torch.Size([10000, 3072]))

In [35]:
import numpy as np

class NearestNeighbor(object):
  def __init__(self):
    pass

  def train(self, X, y):
    """ X is N x D where each row is an example. Y is 1-dimension of size N """
    # the nearest neighbor classifier simply remembers all the training data
    self.Xtr = X
    self.ytr = y

  def predict(self, X):
    """ X is N x D where each row is an example we wish to predict label for """
    num_test = X.shape[0]
    # lets make sure that the output type matches the input type
    Ypred = torch.zeros(num_test, dtype = self.ytr.dtype)

    # loop over all test rows
    for i in range(num_test):
      # find the nearest training image to the i'th test image
      # using the L1 distance (sum of absolute value differences)
      
      distances = torch.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
      min_index = torch.argmin(distances) # get the index with smallest distance
      Ypred[i] = self.ytr[min_index] # predict the label of the nearest example

    return Ypred

In [36]:
nn = NearestNeighbor() # create a Nearest Neighbor classifier class
nn.train(Xtr_rows, Ytr) # train the classifier on the training images and labels
Yte_predict = nn.predict(Xte_rows) # predict labels on the test images
# and now print the classification accuracy, which is the average number
# of examples that are correctly predicted (i.e. label matches)
print ('accuracy: %f' % ( np.mean(Yte_predict == Yte) ))