Developing a classifier:
========================
* Review of image as array
* Filtering data via Boolean masking
* Computing statistics on selected regions
* Using statistics to build an NDVI classifier
* Creating classifiers for water, building, clouds



In [None]:
#read in data
import landsat as lf
rgbfile = "data/landsat_RGBN.txt" 
rgbn = lf.landsat_read(rgbfile)

In [None]:
#find the clearest image
%matplotlib notebook
rgb_array = lf.rgb_display(rgbn[...,:3])

How do we unpack data?
============================================
![axis](figs/axis.png)

In [None]:
#rgb_array has three axis: [row, col, channel] axis=[0, 1, 2]
#unpack red, green, blue
red = rgb_array[...,0]
green = rgb_array[...,1]
blue = rgb_array[...,2]

In [None]:
#plot each of the individual channels and compare to image
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2,2, sharex=True, sharey=True)
rgb = axes[0,0].imshow(rgb_array)
r = axes[0,1].imshow(red, cmap="Reds", vmin=0, vmax=1)
fig.colorbar(r, ax=axes[0,1])
g = axes[1,0].imshow(green, cmap="Greens", vmin=0, vmax=1)
fig.colorbar(g, ax=axes[1,0])
b = axes[1,1].imshow(blue, cmap="Blues", vmin=0, vmax=1)
fig.colorbar(b, ax=axes[1,1])
for ax in axes.flatten():
    ax.set_adjustable('box-forced')
#to do: explore the following images and see if you can find links
#between the raw data and each of the color bands

How do we compute statistics?
============================

In [None]:
print(red.mean(), green.mean(), blue.mean())

In [None]:
#plot histogram of red channel
%matplotlib inline
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.hist(red.flatten(), color="red")
ax.set_ylabel("# of observations")
ax.set_xlabel("temperature bins")
fig.canvas.draw()

In [None]:
#ToDo: plot histograms for green and blue channels

How do we filter?
=======================================
![boolean mask](figs/masking.png)

In [None]:
#lets build a blue mask
blue_mask = (blue>blue.mean())
print (blue_mask)

In [None]:
import numpy as np
import matplotlib.cm as mcm
import matplotlib.colors as mcolors
#what does that mean?

cmap = mcm.bone
norm = mcolors.BoundaryNorm([0,1,2],cmap.N)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
im = ax.imshow(blue_mask, cmap=cmap, norm=norm)
cb = fig.colorbar(im, ax=ax)
cb.set_ticks([0.5,1.5])
cb.set_ticklabels(["not in mask","in mask"])
fig.canvas.draw()


In [None]:
#plot only the data shown in the map
rgb_copy = rgb_array.copy()
#zero out values not in mask

rgb_copy[~blue_mask] = np.nan
cmap = mcm.bone
norm = mcolors.BoundaryNorm([0,1,2],cmap.N)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
im = ax.imshow(rgb_copy)
fig.canvas.draw()

In [None]:
#combine masks and show which region is in both
blue_mask = (blue>blue.mean())
red_mask = (red>red.mean())
comb_mask = (blue_mask & red_mask)

cmap = mcm.bone
norm = mcolors.BoundaryNorm([0,1,2],cmap.N)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
im = ax.imshow(comb_mask, cmap=cmap, norm=norm)
cb = fig.colorbar(im, ax=ax)
cb.set_ticks([0.5,1.5])
cb.set_ticklabels(["not in mask","in mask"])
fig.canvas.draw()



ToDo
=====
1) Apply combined mask to daya
2) Explore different thresholds for each of the color channels
3) Combine various thresholds
4) See if combination masks can highlight/yield features in the dataset