In [1]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image

In [22]:
data = [] # data list
labels = [] # label list

# iterate over the training images in folder
for folder in os.listdir('training//'):
    for file in os.listdir('training//'+folder+'//'):
        img = Image.open('training//'+folder+'//'+file)
        img_width, img_height = img.size
        # crop image at center
        img = img.crop(((img_width - 200) // 2,(img_height - 200) // 2,(img_width + 200) // 2,(img_height + 200) // 2))
        # resize image to image with height and width 16
        img = img.resize((16,16))
        # convert to array
        img = np.array(img)
        # standardize images ensuring all pixels values have mean of zero and standard deviation of 1
        img = (img - img.mean()) / 255.0
        # reshape the images to 16x16
        img = img.reshape(16*16)
        # append the final processed image to data list
        data.append(img)
        # append label to label list
        labels.append(folder)

In [50]:
from sklearn.model_selection import train_test_split

# split data into train and test set with 10% data in the test set.
train_data, test_data, train_label, test_label = train_test_split(data, labels, test_size=0.1, random_state = 2020)

In [51]:
from sklearn.neighbors import KNeighborsClassifier

# instantiate k neighbour classifier with 100 clusters
neigh = KNeighborsClassifier(n_neighbors=100)
# fit classifier on training data and train labels
neigh.fit(train_data, train_label)

KNeighborsClassifier(n_neighbors=100)

In [52]:
from sklearn.metrics import accuracy_score
# calculate accuracy of test data
accuracy_score(test_label, neigh.predict(test_data))

0.18

In [53]:
from sklearn.metrics import classification_report

# generate the classification report
print(classification_report(test_label, neigh.predict(test_data)))

              precision    recall  f1-score   support

       Coast       0.20      0.17      0.18        12
      Forest       0.08      0.67      0.14         6
     Highway       0.16      0.79      0.27        14
  Insidecity       0.00      0.00      0.00        10
    Mountain       1.00      0.10      0.18        10
      Office       1.00      0.10      0.18        10
 OpenCountry       0.18      0.17      0.17        12
      Street       0.67      0.15      0.25        13
      Suburb       1.00      0.40      0.57        10
TallBuilding       0.00      0.00      0.00         8
     bedroom       0.00      0.00      0.00         7
  industrial       0.00      0.00      0.00         7
     kitchen       0.00      0.00      0.00        12
  livingroom       0.00      0.00      0.00         8
       store       0.00      0.00      0.00        11

    accuracy                           0.18       150
   macro avg       0.29      0.17      0.13       150
weighted avg       0.31   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [46]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_neighbors':[30, 40, 50, 60, 70, 80, 90, 100,110, 120], 'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'], 
              'leaf_size': [20, 25, 30, 50], 'metric': ['minkowski']}

grid_search = GridSearchCV(KNeighborsClassifier(), parameters)
grid_search.fit(train_data, train_label)

GridSearchCV(estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'leaf_size': [20, 25, 30, 50], 'metric': ['minkowski'],
                         'n_neighbors': [30, 40, 50, 60, 70, 80, 90, 100, 110,
                                         120]})

In [47]:
grid_search.best_score_

0.17333333333333334

In [48]:
grid_search.best_params_

{'algorithm': 'auto',
 'leaf_size': 20,
 'metric': 'minkowski',
 'n_neighbors': 30}

In [49]:
y_pred = grid_search.predict(test_data)
print('accuracy:', accuracy_score(test_label, y_pred))
print(classification_report(test_label, y_pred))

accuracy: 0.2
              precision    recall  f1-score   support

       Coast       0.09      0.15      0.11        13
      Forest       0.24      0.57      0.33        14
     Highway       0.24      0.85      0.38        13
  Insidecity       0.00      0.00      0.00         7
    Mountain       0.00      0.00      0.00         5
      Office       0.00      0.00      0.00        11
 OpenCountry       0.15      0.40      0.22        10
      Street       0.33      0.50      0.40         6
      Suburb       1.00      0.22      0.36         9
TallBuilding       0.00      0.00      0.00         5
     bedroom       0.00      0.00      0.00        11
  industrial       0.00      0.00      0.00        13
     kitchen       0.00      0.00      0.00         8
  livingroom       0.00      0.00      0.00        13
       store       0.00      0.00      0.00        12

    accuracy                           0.20       150
   macro avg       0.14      0.18      0.12       150
weighted avg

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
