# Load the data

You can download CIFAR10 dataset from official site:

https://www.cs.toronto.edu/~kriz/cifar.html

or do it using Pytorch:
https://pytorch.org/vision/stable/datasets.html#cifar

or another framework.



In [None]:
# Put your code here
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
from PIL import Image


# Define transformation for each image
transform  = transforms.Compose([
    transforms.Lambda(lambda x: np.array(x).flatten()) #Stretch image into row [32,32,3] -> [3072]
])

# Download a CIFAR10 dataset
dataset = datasets.CIFAR10("content",
                           train=True,
                           transform = transform,
                           download=True)

classes = ['Самолет', 'Автомобиль', 'Птица', 'Кошка', 'Олень', 'Собака', 'Лягушка', 'Лошадь', 'Корабль', 'Грузовик']

Files already downloaded and verified


## Split data into train and validation parts

Extract subset containing train and validation parts. 
Part must contain 5000 and 500 samples respectively.

In [None]:
# Put your code here
x_train, x_val, _ = random_split(dataset, [5000, 500, 44500])

x_names_indexes = []
for images, class_nums in DataLoader(x_train):
  class_name = classes[class_nums[0].item()]
  x_names_indexes.append(classes.index(class_name))

## Dispaly some images  along with it class names
  


In [None]:
# Put your code here
for images, class_nums in DataLoader(x_train, batch_size = 256):
  print(classes[class_nums[0].item()])
  display(Image.fromarray(images[0].reshape((32,32,3)).numpy()))

# Implement NearestNeighbor class

In [None]:
from sklearn.neighbors import NearestNeighbors

class NearestNeighbor:
  def __init__(self): # You can change the method signature
    pass

  def fit(self,x,y): # You can change the method signature
    """
      Arguments:
        x  (Tensor or numpy.array): collection of objects from testset (batch)
        y  (Tensor or numpy.array): collection of integer 
        representing a class number for objects from x
    
    """
    self.x = x
    self.y = y
  
  def predict(self, x):
    """
      Arguments:
          x  (Tensor or numpy.array): collection of objects from testset (batch)
      
      Returns:
          class_num (Tensor or numpy.array) - collection of integer representing
          class numbers for objects from x             
    """
    labels = np.zeros(len(x))
    for i in range(len(x)):
      distances = [np.sum(np.abs(self.x[j][0] - x[i][0])) for j in range(len(self.x))]
      labels[i] = self.y[np.argmin(distances)]
    return labels


##Perform smoke test

- Create model instance
- get predictions for dozen of samples


In [None]:
model_nn = NearestNeighbor()

test_dataset = datasets.CIFAR10("content",
                           train=False,
                           transform = transform,
                           download=True)
x_test, _ = random_split(test_dataset, [10, 9990])
x_test_names_indexes = []
for images, class_nums in DataLoader(x_test):
  x_test_names_indexes.append(classes[class_nums[0].item()])

model_nn.fit(x_train, x_names_indexes)
predicted_labels = model_nn.predict(x_test)
for i, label in enumerate(predicted_labels):
  print(classes[int(label)], "vs", x_test_names_indexes[i])


Files already downloaded and verified
Корабль vs Грузовик
Автомобиль vs Автомобиль
Самолет vs Птица
Корабль vs Корабль
Корабль vs Собака
Корабль vs Корабль
Птица vs Лошадь
Птица vs Олень
Олень vs Самолет
Птица vs Собака


# Validate your model

## Create validation function

Must calculate Accuracy metric for your model.

In [None]:
def validate(model,x, y):
  """
      Arguments:
          model (Object): instance of NearestNeighbor class
          x (Tensor or numpy.array): collection of objects 
          y (Tensor or numpy.array): collection of integer representing
          class numbers for objects from x   
      
      Returns:
          accuracy (float) : percentage of correctly classified objects 
    """
  
  correct, total = 0, 0
  predicted_labels = model_nn.predict(x_test)
  for i, label in enumerate(predicted_labels):
    total += 1
    if classes[int(label)] == x_test_names_indexes[i]:
      correct += 1
  return correct / total

 ## Calculate model accuracy on validation data

In [None]:
accuracy = validate(model_nn, x_test, x_test_names_indexes) 
print ("Accuracy {:.3f}".format(accuracy))

Accuracy 0.300


# Place for brief conclusion
Feel free to describe troubles here.


...



# Ideas for extra work

- Implenment K-NearestNeighbor
- Test different distance functions
- Find hyperparams using Cross-Validation
- Plot accuracy curve against K
- Evaluate time  