<a href="https://colab.research.google.com/github/prajachintya/Course_IORS/blob/main/KNN_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1) Import libraries**


In [None]:
!pip install earthpy

In [290]:
from osgeo import gdal
import math
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.colors import ListedColormap
from gdalconst import *
import pandas as pd
from scipy.spatial import distance_matrix
import earthpy.plot as ep
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

** italicized text2) Import image**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Colab Notebooks/citra

In [293]:
# open dataset
filename = "/content/drive/MyDrive/Colab Notebooks/citra/citra_new.tif"
dataset = gdal.Open(filename, GA_ReadOnly)

In [None]:
rb = dataset.GetRasterBand(3)
img_array = rb.ReadAsArray()
plt.imshow(img_array)

plt.show()

In [None]:
img_array.shape

In [296]:
# retrieve metadata from raster
rows = dataset.RasterYSize
columns = dataset.RasterXSize
N = rows * columns
bands = dataset.RasterCount

In [None]:
# print basic metadata
print ("image metadata:") 
print (rows, "rows x", columns, "columns x", bands, "bands")

**3) Retrieve RGB arrays from input image**

In [298]:
array_R = dataset.GetRasterBand(1).ReadAsArray().astype(int)
array_G = dataset.GetRasterBand(2).ReadAsArray().astype(int)
array_B = dataset.GetRasterBand(3).ReadAsArray().astype(int)
array_RGB = np.zeros((rows, columns, 3), dtype=np.uint8)
array_RGB[:,:,0] = array_R
array_RGB[:,:,1] = array_G
array_RGB[:,:,2] = array_B
array_RGB_copy = array_RGB.copy()

# print(array_RGB_copy)

In [None]:
array_RGB.shape

In [300]:
array_RGB = array_RGB.reshape((449*693, 3))

In [None]:
array_RGB

**4) Import samples**

four classes, 50 samples data each class
*   Vegetation (1)
*   Water (2)
*   Build area (3)
*   Road (4)

In [302]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/citra/class_samples.csv').values.tolist()

In [None]:
df

**5) Distance matrix calculation**

In [304]:
distance_mat = []

for sample in df:
  row_sample = sample[1]
  column_sample = sample[0]
  r_sample = array_RGB_copy[row_sample, column_sample, 0]
  g_sample = array_RGB_copy[row_sample, column_sample, 1]
  b_sample = array_RGB_copy[row_sample, column_sample, 2]
  rgb = np.array([r_sample, g_sample, b_sample]).flatten().reshape(1,3)
  distance_mat.append(distance_matrix(array_RGB, rgb))

  

In [305]:
np.asarray(distance_mat).shape

(200, 311157, 1)

In [306]:
distance_matrix = np.asarray(distance_mat)

In [307]:
distance_matrix = np.reshape(distance_matrix, (distance_matrix.shape[0], distance_matrix.shape[1]), order="C")

In [None]:
distance_matrix.T

In [None]:
distance_matrix.shape

In [310]:
distance_matrix=distance_matrix.T

In [None]:
distance_matrix.shape

**6) Sorting distances**

In [312]:
k = 10
sortedDist = np.argsort(distance_matrix)

tensmallest= sortedDist [:,:k]

In [None]:
np.where(sortedDist < 50)

In [None]:
sortedDist[20000]

In [None]:
tensmallest.shape

**7) Reclassify based on class label**

In [316]:
class_matrix = np.empty(shape=(311157,10))

for i in range(len(tensmallest)):
  for k in range(len(tensmallest[i])):
    if tensmallest[i][k] < 51:
      class_matrix[i][k] = 1
    if tensmallest[i][k] > 50 and tensmallest[i][k] < 101 :
      class_matrix[i][k] = 2
    if tensmallest[i][k] > 100 and tensmallest[i][k] < 151 :
      class_matrix[i][k] = 3
    if tensmallest[i][k] > 150 and tensmallest[i][k] < 201 :
      class_matrix[i][k] = 4

In [None]:
class_matrix = class_matrix.astype(int)
class_matrix

In [None]:
class_matrix.shape

**8) Count the most frequent class**

In [None]:
b = ([[np.bincount(row).argmax()] for row in class_matrix])

print(b)

In [320]:
classification_array = np.asarray(b)

In [None]:
classification_array

In [322]:
image_classification = np.reshape(classification_array, (rows, columns))

In [None]:
image_classification.shape


**9) Plot Classification Result**

In [None]:
# Define color map
lu_colors = ["darkgreen", "blue", "brown", "gray"]
lu_cmap = ListedColormap(lu_colors)

# Define class names
lu_cat_names = [
    "Vegetation",
    "Water",
    "Build area",
    "Road",   
]

# Get list of classes
classes = np.unique(image_classification)
classes = classes.tolist()
classes = classes[0:4]

# Plot your data
fig, ax = plt.subplots(figsize=(12, 12))
im = ax.imshow(image_classification, cmap=lu_cmap)

# ep.draw_legend
ep.draw_legend(im_ax=im, classes=classes, titles=lu_cat_names)
ax.set_title(
    "Land Cover Classificiation using KNN",
    fontsize=14,
)
ax.set_axis_off()

# Auto adjust subplot to fit figure size
plt.tight_layout()

**10) Evaluation : Calculate confusion matrix**

In [325]:
test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/citra/test1.csv').values.tolist()

In [326]:
tests = np.asarray(test)

In [327]:
predict = []
for test in tests:
    row_sample = test[0]
    column_sample = test[1]
    #check.append([column_sample,row_sample,axx[row_sample, column_sample]])
    predict.append(image_classification[row_sample, column_sample])

In [328]:
predict= np.asarray(predict)

In [329]:
test_label = []

for i in range (len(tests)):
  test_label.append(tests[i][2])

In [330]:
#confussion matrix
cf_matrix = confusion_matrix(predict, test_label)

In [None]:
import seaborn as sn

array = cf_matrix

df_cm = pd.DataFrame(array, index = [i for i in "1234"],
                  columns = [i for i in "1234"])
# plt.figure(figsize=(10,7))
sn.set(font_scale=1.4) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 16}) # font size

plt.show()

In [None]:
#accuracy
accuracy_score(predict, test_label)

In [None]:
#precision
precision_score(predict, test_label, average=None)