#### MANUAL CLASSIFICATION SCRIPTS

In [None]:
# load dependencies and allow interactivity
%matplotlib widget

import rioxarray as rxr
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os
import numpy as np

In [None]:
# user definitions
data_direc = '/Users/jpflug/Documents/Projects/cubesatReanaly/Data/Meadows/STR/'
focus_year = 2023

# set to 0 if classifying snow-absent gridcells
# set to 1 if classifying snow-covered gridcells
# set to 2 if classifying glint or other image errors/occlusions
snow_present = 2

# specify the indices of the images that you want to click through
# 737 snow presence/absence scenes
# set_of_ten = [86,73,68,64,61,57,48,42,27,16]
# 737 image artifacts
# set_of_ten = [47,50,52,69,11,13,30,31,43,46]
# 869 snow presence/absence scenes
# set_of_ten = [56,51,47,45,43,40,35,29,26,24]
# 869 image artifacts
# set_of_ten = [36,29,6,5,4,1]
# 551 snow presence/absence scenes
# set_of_ten = [70,61,55,51,49,45,41,33,26,23]
# 551 image artifacts
# set_of_ten = [38,3,2,1]
# DAN snow presence/absence scenes
# set_of_ten = [90,89,84,89,77,74,66,63,30,28]
# DAN image artifacts
# set_of_ten = [73,65,58,54,52,45]
# DPO snow presence/absence scenes
# set_of_ten = [125,120,115,113,109,103,96,87,78,60]
# DPO image artifacts
# set_of_ten = [74,66,64,58,55]
# GIN snow presence/absence scenes
# set_of_ten = [62,57,55,52,50,42,17,12,8,2]
# GIN image artifacts
# set_of_ten = [49,41,37,34,26]
# STR snow presence/absence scenes
# set_of_ten = [95,93,91,89,86,81,78,63,56,34]
# STR image artifacts
set_of_ten = [53,47,66]

subdirecs = sorted([d for d in glob.glob(data_direc + str(focus_year) + '*') if os.path.isdir(d)])
print('length of data record: ',len(subdirecs))

In [None]:
#### functions
def calc_rgb(ds):
    # Selecting RGB bands
    blue_band = ds.isel(band=0)
    green_band = ds.isel(band=1)
    red_band = ds.isel(band=2)
    nir_band = ds.isel(band=3)
    
    # normalize to help visual understanding
    maxval = green_band.max().values
    minval = green_band.min().values
    red_norm = (red_band - minval) / (maxval - minval)
    green_norm = (green_band - minval) / (maxval - minval)
    blue_norm = (blue_band - minval) / (maxval - minval)
    green_norm = green_norm.where(red_norm <= 1,1)
    blue_norm = blue_norm.where(red_norm <= 1,1)
    red_norm = red_norm.where(red_norm <= 1,1)

    # port to numpy
    red_band = red_band.values
    green_band = green_band.values
    blue_band = blue_band.values
    nir_band = nir_band.values
    
    # Stack normalized bands to create RGB image
    rgb_image = np.stack([red_norm, green_norm, blue_norm], axis=-1)
    return red_band,green_band,blue_band,nir_band,rgb_image


# initialize
clicked_points = []

In [None]:
# # view all of the plots to make the set of 10 selection
# # only for visualization, so comment out if not using

# for direcCount,direcc in enumerate(subdirecs):
#     try:
#         fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
#         ds = rxr.open_rasterio(fname)
#         red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

#         fg,ax = plt.subplots(figsize=(4,4))
#         ax.imshow(rgb_image,cmap='gray')
#         ax.set_title(direcCount)
        
#     except:
#         print('bad data')

In [None]:
direcc = subdirecs[set_of_ten[0]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[1]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[2]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[3]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[4]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[5]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[6]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[7]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[8]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
direcc = subdirecs[set_of_ten[9]]
fname = glob.glob(direcc+'/*/PSScene/*SR_clip.tif')[0]
ds = rxr.open_rasterio(fname)
red_band, green_band, blue_band, nir_band, rgb_image = calc_rgb(ds)

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image, cmap='gray')

# Function to handle mouse clicks
def onclick(event):
    if event.button == 1:  # Left mouse button clicked
        x = int(event.xdata)
        y = int(event.ydata)
        clicked_points.append((x, y, red_band[y, x], green_band[y, x], blue_band[y, x], nir_band[y, x],
                               rgb_image[y, x, 0], rgb_image[y, x, 1], rgb_image[y, x, 2]))
        plt.plot(x, y, 'ro')  # Mark clicked point with red dot
        plt.draw()

# Connect the mouse click event to the onclick function
plt.connect('button_press_event', onclick)
plt.show()

In [None]:
df = pd.DataFrame(clicked_points, columns=['x', 'y','r','g','b','nir','r_norm','g_norm','b_norm'])
print(df)

if snow_present == 0:
    df.to_csv(data_direc+'self_classified/self_classified_'+str(focus_year)+'_class0.csv')
elif snow_present == 1:
    df.to_csv(data_direc+'self_classified/self_classified_'+str(focus_year)+'_class1.csv')
else:
    df.to_csv(data_direc+'self_classified/self_classified_'+str(focus_year)+'_class2.csv')

#### MODEL TRAINING SCRIPTS

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
import joblib

data_direc = '/Users/jpflug/Documents/Projects/cubesatReanaly/Data/Meadows/STR/'

In [2]:
X0 = pd.read_csv(data_direc+'self_classified/self_classified_2023_class0.csv')
X0 = X0.assign(label=0)
X1 = pd.read_csv(data_direc+'self_classified/self_classified_2023_class1.csv')
X1 = X1.assign(label=1)
X2 = pd.read_csv(data_direc+'self_classified/self_classified_2023_class2.csv')
X2 = X2.assign(label=2)
X = pd.concat([X0,X1,X2])
y = np.ravel(X[['label']].values)
X

Unnamed: 0.1,Unnamed: 0,x,y,r,g,b,nir,r_norm,g_norm,b_norm,label
0,0,114,227,1652,1410,988,2303,0.284288,0.242643,0.170022,0
1,1,131,220,1711,1496,1006,2477,0.294442,0.257443,0.173120,0
2,2,151,213,2013,1745,1359,2578,0.346412,0.300293,0.233867,0
3,3,188,186,749,559,252,2139,0.128893,0.096197,0.043366,0
4,4,213,196,552,505,191,1947,0.094992,0.086904,0.032869,0
...,...,...,...,...,...,...,...,...,...,...,...
62,62,180,8,3904,7111,3107,4097,0.533771,0.972245,0.424802,2
63,63,120,34,4865,7068,5453,5296,0.665163,0.966366,0.745556,2
64,64,133,42,4834,7078,5472,5371,0.660924,0.967733,0.748154,2
65,65,20,243,3870,6183,5298,4394,0.529122,0.845365,0.724364,2


In [3]:
model = RandomForestClassifier(n_estimators=10, max_depth=10, max_features=3, random_state=1)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=1000, random_state=1)
n_accuracy = cross_val_score(model, X[['b','g','r','nir']],y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
# n_f1 = cross_val_score(model,X[['b','g','r','nir']], y, scoring='f1', cv=cv, n_jobs=-1, error_score='raise')
n_balanced_accuracy = cross_val_score(model,X[['b','g','r','nir']], y, scoring='balanced_accuracy', cv=cv, n_jobs=-1, error_score='raise')

In [4]:
# report performance
# plt.hist(n_f1)
print('Repeat times:'.format(), len(n_accuracy))
# print('F1-score: %.5f (%.5f)' % (n_f1.mean(), n_f1.std()))
print('Balanced Accuracy: %.5f (%.5f)' % (n_balanced_accuracy.mean(), n_balanced_accuracy.std()))
print('Accuracy: %.5f (%.5f)' % (n_accuracy.mean(), n_accuracy.std()))

Repeat times: 10000
Balanced Accuracy: 0.97847 (0.03308)
Accuracy: 0.99654 (0.00527)


In [5]:
# fit model with all observations
model.fit(X[['b','g','r','nir']],y)
joblib.dump(model,data_direc+'self_classified/3class_model.joblib')

['/Users/jpflug/Documents/Projects/cubesatReanaly/Data/Meadows/STR/self_classified/3class_model.joblib']