In [None]:
#Using Sentinel-2 available spectral libraries for soil and veg to classify a scene

import numpy as np
import pandas as pd
import rasterio as rio
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt #for imshow

# Define spectral libraries.
spectral_libraries = {
    'vegetation': [2.8, 3.9, 2.1, 1.5],
    'bare_soil': [1.3, 1.6, 1.0, 0.8],
    'water': [0.2, 0.3, 0.4, 0.5]
}

# Open Sentinel-2 raster file.
with rio.open('sentinel-2.tif') as src:
    # Read the raster bands into numpy arrays.
    red = src.read(3)
    green = src.read(2)
    blue = src.read(1)
    nir = src.read(4)

    # Reshape the numpy arrays into a 2D array (pixel values as rows, bands as columns).
    bands = np.array([red.flatten(), green.flatten(), blue.flatten(), nir.flatten()]).T

# Convert numpy array into a pandas DataFrame.
df = pd.DataFrame(bands, columns=['red', 'green', 'blue', 'nir'])

# Calculate veg index.
df['ndvi'] = (df['nir'] - df['red']) / (df['nir'] + df['red'])

# training data (spectral libraries) and labels.
X_train = pd.DataFrame.from_dict(spectral_libraries, orient='index')
y_train = X_train.index

# Train Random Forest classifier.
clf = RandomForestClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train)

# Use clf to predict vegetation type of each pixel.
y_pred = clf.predict(df)

# Reshape predictions back into the original raster shape.
pred_raster = y_pred.reshape(red.shape)

# Save the predicted raster as a GeoTIFF file.
with rio.open('vegetation_classification.tif', 'w', **src.profile) as dst:
    dst.write(pred_raster.astype(rio.uint8), 1)

# Plot the predicted classes of the pixels.
plt.figure()
plt.imshow(y_pred, cmap='tab10')
plt.colorbar()
plt.show()