# Using the Final Random Forest Model To Predict for Years 2020-2023

In [None]:
# importing packages and initializing drive
# https://joblib.readthedocs.io/en/stable/
# https://rasterio.readthedocs.io/en/stable/
# https://gdal.org/index.html

from google.colab import drive
from osgeo import gdal
import numpy as np
drive.mount('/content/drive')
import joblib
!pip install rasterio
import rasterio

In [None]:
# open the raster to predict for and convert it to an array to run through the RF model
file_path = '/content/drive/MyDrive/CS 131 final project/tif files/cs131_costa_rica_2020_30m-0.tif'

tif_dataset = gdal.Open(file_path)
rows = tif_dataset.RasterYSize
cols = tif_dataset.RasterXSize

bands_data = []
for i in range(1, tif_dataset.RasterCount + 1):
    band = tif_dataset.GetRasterBand(i)
    band_data = band.ReadAsArray()
    bands_data.append(band_data)

columns = [band_data.flatten()[:, np.newaxis] for band_data in bands_data]

# Concatenate the columns horizontally
stacked_data = np.column_stack(columns)

# Reorder the columns
column_names = ['B2', 'B3', 'B4', 'B8', 'B11', 'NDVI', 'SAVI', 'NDMI', 'NDWI']

new_order = [4, 0, 1, 2, 3, 7, 5, 8, 6]

stacked_data = stacked_data[:, new_order]

# Define mask to identify null values
mask = np.isnan(stacked_data).any(axis=1)

# Flatten the mask to find indices where nulls occurred
mask_flat = mask.flatten()

# Save indices of null values
null_indices = np.where(mask_flat)[0]

# load in the final model
rf_final = joblib.load('/content/drive/MyDrive/CS 131 final project/random_forest_model.pkl')

# Make predictions on the data without nulls
predictions = rf_final.predict(stacked_data[~mask].reshape(-1, stacked_data.shape[1]))

# Create an array filled with null values initially
predictions_with_nulls = np.full((rows * cols,), np.nan)

# Insert predictions into the array where nulls were not present
predictions_with_nulls[~mask_flat] = predictions

# Reshape the predictions array to match the original dimensions
predictions_reshaped = predictions_with_nulls.reshape(rows, cols)

# Define output file path
output_file_path = '/content/drive/My Drive/rf_predicted_output_30m_2020_0.tif'

# Open the original GeoTIFF file to get metadata
with rasterio.open(file_path) as src:
    profile = src.profile
    transform = src.transform

# Update the metadata for the predicted data
profile.update(dtype=rasterio.float32, count=1)

# Write the predicted data to a new GeoTIFF file
with rasterio.open(output_file_path, 'w', **profile) as dst:
    # Write the predicted data with proper georeferencing
    dst.write(predictions_reshaped.astype(rasterio.float32), 1)