<a href="https://colab.research.google.com/github/thanit456/computervision_playground/blob/master/knn_image_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
import tqdm

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [30]:
x_train_rows = x_train.reshape(-1, 32*32*3)
x_test_rows = x_test.reshape(-1, 32*32*3)
print(x_train_rows.shape)
print(x_test_rows.shape)

(50000, 3072)
(10000, 3072)


In [0]:
class NearestNeigbour(object):
  def __init__(self, mode='L1'):
    self.mode = mode
  def train(self, x, y):
    self.x_train = x
    self.y_train = y
  def predict(self, x, k=None):
    if k is None:
      num_test = x.shape[0]
    else:
      num_test = k
    y_pred = np.zeros(num_test, dtype=self.y_train.dtype)
    
    for i in tqdm.tqdm(range(num_test)):
      # using L1 distance
      if self.mode == "L1":
        distances = np.sum(np.abs(self.x_train - x[i, :]), axis=1)
      elif self.mode == "L2":
        distances = np.sqrt(np.sum(np.square(self.x_train - x[i, :]), axis=1))
      min_index = np.argmin(distances)
      y_pred[i] = self.y_train[min_index]
    
    return y_pred

# training set and testing set

In [66]:
nn = NearestNeigbour()
nn.train(x_train_rows, y_train)
y_pred = nn.predict(x_test_rows)
print('accuracy : {%f}'.format(np.mean(y_pred == y_test)))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  3%|▎         | 1416/50000 [05:49<3:21:14,  4.02it/s][A
  3%|▎         | 1417/50000 [05:49<3:20:40,  4.03it/s][A
  3%|▎         | 1418/50000 [05:49<3:19:48,  4.05it/s][A
  3%|▎         | 1419/50000 [05:50<3:19:57,  4.05it/s][A
  3%|▎         | 1420/50000 [05:50<3:20:26,  4.04it/s][A
  3%|▎         | 1421/50000 [05:50<3:19:17,  4.06it/s][A
  3%|▎         | 1422/50000 [05:50<3:18:17,  4.08it/s][A
  3%|▎         | 1423/50000 [05:51<3:19:41,  4.05it/s][A
  3%|▎         | 1424/50000 [05:51<3:18:58,  4.07it/s][A
  3%|▎         | 1425/50000 [05:51<3:22:24,  4.00it/s][A
  3%|▎         | 1426/50000 [05:51<3:21:16,  4.02it/s][A
  3%|▎         | 1427/50000 [05:52<3:21:04,  4.03it/s][A
  3%|▎         | 1428/50000 [05:52<3:20:53,  4.03it/s][A
  3%|▎         | 1429/50000 [05:52<3:19:31,  4.06it/s][A
  3%|▎         | 1430/50000 [05:52<3:20:04,  4.05it/s][A
  3%|▎         | 1431/50000 [05:53<3:20:32,  4.04it/s][A
  3%|▎ 

KeyboardInterrupt: ignored

# training set, validation set and testing set

In [0]:
x_val_rows = x_train_rows[:1000, :]
y_val = y_train[:1000]
x_train_rows = x_train_rows[1000:, :]
y_train = y_train[1000:]

validation_accuracies = []
for i in range(100):
  nn = NearestNeighbour(mode="L2")
  nn.train(x_train_rows, y_train)
  
  if i % 10 == 9:
    y_val_pred = nn.predict(x_val_rows)
    val_acc = np.mean(y_val_pred == y_vals)
    print('validation acc : {%f}'.format(val_acc))
    validation_accuracies.append((i, val_acc)) 

# Stratify train : val : test

In [31]:
x_rows = x_train_rows
y = y_train
print(x_rows.shape)
print(y.shape)

(50000, 3072)
(50000, 1)


In [32]:
from sklearn.model_selection import StratifiedKFold

# 5-fold cross validation
num_fold = 5
folds = []
skf = StratifiedKFold(n_splits=num_fold, random_state=None, shuffle=False)
for train_index, val_test_index in skf.split(x_train_rows, y_train):
  x_train, y_train = x_train_rows[train_index], y_train[train_index]
  x_val, y_val = x_train_rows[val_index], y_train[val_index]
  folds.append(((x_train, y_train), (x_val, y_val)))

for i in range(num_fold):

  (x_train_rows, y_train), (x_val_rows, y_val) = folds[i]

  validation_accuracies = []
  for i in range(100):
    nn = NearestNeighbour(mode="L2")
    nn.train(x_train_rows, y_train)
    
    if i % 10 == 9:
      y_val_pred = nn.predict(x_val)
      val_acc = np.mean(y_val_pred == y_val)
      print('validation acc : {%f}'.format(val_acc))
      validation_accuracies.append((i, val_acc)) 
      
  print('Fold {} contains val_acc : {}'.format(i+1,  validation_accuracies))

40000 10000
40000 10000
40000 10000
40000 10000
40000 10000
