Load in the drone image as well as the image with classification data. The classification image was created at a higher resolution, so it needs to be downscaled prior to classification

-Note: visualization is currently commented out because it takes a long time and isn't necassary

In [None]:
#import Python 3's print function and division
from __future__ import print_function, division

#Import GDAL, NumPy, and matplotlib
from osgeo import gdal,gdal_array
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import LeaveOneOut, KFold
from sklearn.model_selection import cross_val_score

#Tell GDAL to throw Python exceptions and register all drivers
gdal.UseExceptions()
gdal.AllRegister()

#read in our image and ROI image
img_ds= gdal.Open("",
                gdal.GA_ReadOnly)
roi_ds = gdal.Open('')

# Need resolutions to match
img_reference = img_ds.GetGeoTransform()
x_res = img_reference[1]
y_res = -img_reference[5]
minx = img_reference[0]
maxy = img_reference[3]

input = roi_ds
# This line allows us to output the image and use transformation to do the lifting of reprojection
output = ''
kwargs = {'format':'GTiff', 'xRes': x_res, 'yRes':y_res}
ds = gdal.Warp(output, input, **kwargs)

# Now read in that newly reprojected image
roi_ds = gdal.Open('/content/drive/MyDrive/Drone_Flights/Classification/NV_2_2_class/NV_02_02_trainingReady4.tif')



img = np.zeros((img_ds.RasterYSize, img_ds.RasterXSize,img_ds.RasterCount),
               gdal_array.GDALTypeCodeToNumericTypeCode(img_ds.GetRasterBand(1).DataType))
for b in range(3):
  img[:,:,b] = img_ds.GetRasterBand(b+1).ReadAsArray()

roi = roi_ds.GetRasterBand(1).ReadAsArray().astype(np.float64)

# Display them
#plt.subplot(121)
#plt.imshow(img[:,:,1],cmap=plt.cm.Greys_r)
#plt.title('Red')

#plt.subplot(122)
#plt.imshow(roi, cmap=plt.cm.Spectral)
#plt.title('ROI Training Data')

#plt.show()

In the event of wanting to save the images as numpy arrays, use the code chunk below (use extention .npy)

In [None]:
#np.save('',img)
#np.save('',roi)

In the event of wanting to open images from numpy arrays, use the code chunk below (use extension .npy)

'r+' allows for reading in memory map mode to save ram

In [None]:
import numpy as np
img = np.load('',mmap_mode='r+')
roi = np.load('')

Print how many training pixels we have

In [None]:
# Find how many non-zero entries we have-- ie how many training
# data samples
n_samples = (roi > 0 ).sum()
print('We have {n} samples'.format(n=n_samples))

# What are our classificaitn labels?
labels = np.unique(roi[roi > 0])
print("The training data includes {n} classes:{classes}"
.format(n=labels.size, classes = labels))

Select areas within our image that we have classifications for

In [None]:
# We will need a "X" matrix containing our features, and a "y" array containing our labels
#     These will have n_samples rows

x = img[roi > 0, :]
y = roi[roi > 0]

print('Our X matrix is sized: {sz}'.format(sz=x.shape))
print('Our Y array is sized: {sz}'.format(sz=y.shape))

Store classification labels

In [None]:
y_labels = np.unique(y_test)

Visualize the data in two dimensions to understand how well classificaiton might work for this dataset

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

fig = plt.figure()
ax = fig.add_subplot(111)
colors = ['red', 'blue', 'green']
for c in np.arange(3):
    mask = (y_test==(c+1))
    plt.scatter(x_test[mask,1], x_test[mask,2], color=colors[c], label=y_labels[c])

plt.legend(loc='upper left')
plt.show()

I also like to test out how the data looks in the first two principal components to see if seperation might make more sense in these dimensions

In [None]:
from sklearn.decomposition import PCA
X = x_test - np.mean(x_test,0)
pca = PCA()
pca.fit(X)
pcs = pca.fit_transform(X)


pv1 = pca.components_[0]
pv2 = pca.components_[1]
pcs = pca.fit_transform(X)[:,0:2]

fig = plt.figure()
ax = fig.add_subplot(111)
colors = ['red', 'blue', 'green']
for i in np.arange(3):
    mask = y_test==i+1
    ax.scatter(pcs[mask,0], pcs[mask,1], alpha=0.8, c=colors[i], label=y_labels[i])
plt.legend(loc='upper right')
plt.xlabel('principal component 1')
plt.ylabel('principal component 2')
plt.show()

Build my classification tree

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut, KFold

tree=RandomForestClassifier(n_estimators=100,max_depth=10,oob_score=True,verbose=True)


Fit classification to training data

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
rf = tree.fit(x,y)
y_pred = rf.predict(x)


Visualize the classification tree

In [None]:
# Tree Visualisation
from sklearn.tree import export_graphviz
from IPython.display import Image
import graphviz
for i in range(3):
    tree = rf.estimators_[i]
    dot_data = export_graphviz(tree,
                               filled=True,
                               max_depth=6,
                               impurity=False,
                               proportion=True)
    graph = graphviz.Source(dot_data)
    display(graph)

Confusion matrices can be really helpful for seeing where the model might have shortcomings

In [None]:
# Create the confusion matrix
cm = confusion_matrix(y, y_pred)

ConfusionMatrixDisplay(confusion_matrix=cm).plot();

View the out-of-bad predicition accuracy

In [None]:
print('Our OOB prediction accuracy is : {oob}%'.format(
    oob=rf.oob_score_ * 100
))

View the importance of the input bands

In [None]:
bands =  [1,2,3]

for b, imp in zip(bands, rf.feature_importances_):
  print('Band {b} importance: {imp}'.format(b=b, imp=imp))

**Predict the rest of the image**

In [None]:
# Take our full image, and reshape into long 2d array
# for classificaiton
new_shape = (img.shape[0] * img.shape[1], img.shape[2])

img_as_array = img[:,:,:4].reshape(new_shape)
print('Reshaped from {o} to {n}'.format(o=img.shape,
                                        n=img_as_array.shape))

# Now predict for each pixel
class_prediction = rf.predict(img_as_array)

# Reshape our classification map
class_prediction = class_prediction.reshape(img[:, :, 0].shape)
print('Reshaped from {o} to {n}'.format(o=img.shape,
                                        n=class_prediction.shape))

**Visualize**

In [None]:
n = class_prediction.max()
#Setup colormap
colors = dict((
    (0, (0,0,0,255)),
    (1, (0,150,0,255)), #dead sage
    (2, (0,0,255,255)), #green plants
    (3, (0,255,0,255))#dead others and soil
))

#Put 0 - 255 as float 0 -1
for k in colors:
    v = colors[k]
    v = [_v / 255.0 for _v in v]
    colors[k] = v

index_colors = [colors[key] if key in colors else
                (255, 255, 255, 0) for key in range(1, n + 1)]
cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n)



plt.subplot(122)
plt.imshow(class_prediction, cmap=cmap, interpolation='none')

plt.show()

Export the classified image as a tif file to Google Drive

In [None]:
# create the output image
driver = img_ds.GetDriver()
outDs = driver.Create("", img_ds.RasterXSize, img_ds.RasterYSize, 1, gdal.GDT_Float32)
outBand = outDs.GetRasterBand(1)
outBand.SetNoDataValue(15)
outBand.WriteArray(class_prediction)
outDs.SetGeoTransform(img_ds.GetGeoTransform())