In [None]:
#Load DeepForest Prediction Shapefiles (Hemlock & Not Hemlock) and create UTM coordinate CSV

In [9]:
import os
import geopandas as gpd

# Load the shapefile
hemlock_shp_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\QGIS Shapefiles\Hemlocks from DeepForest Predictions.shp"
hemlock_gdf = gpd.read_file(hemlock_shp_path)

other_trees_shp_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\QGIS Shapefiles\Not Hemlocks from DeepForest Predicitons.shp"
other_trees_gdf = gpd.read_file(other_trees_shp_path)

# Extract geographic bounding boxes from the geometry
hemlock_gdf["xmin"] = hemlock_gdf.geometry.bounds.minx
hemlock_gdf["xmax"] = hemlock_gdf.geometry.bounds.maxx
hemlock_gdf["ymin"] = hemlock_gdf.geometry.bounds.miny
hemlock_gdf["ymax"] = hemlock_gdf.geometry.bounds.maxy

# Extract geographic bounding boxes from the geometry
other_trees_gdf["xmin"] = other_trees_gdf.geometry.bounds.minx
other_trees_gdf["xmax"] = other_trees_gdf.geometry.bounds.maxx
other_trees_gdf["ymin"] = other_trees_gdf.geometry.bounds.miny
other_trees_gdf["ymax"] = other_trees_gdf.geometry.bounds.maxy

# Optional: Keep only relevant columns
other_trees_gdf = other_trees_gdf[["xmin", "ymin", "xmax", "ymax", "image_path"]]


# Optional: Keep only relevant columns
hemlock_gdf = hemlock_gdf[["xmin", "ymin", "xmax", "ymax", "image_path"]]

# Save to CSV
hemlock_csv_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from DeepForest\UTM Coordinates.csv"
other_trees_csv_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Not Hemlocks from DeepForest\UTM Coordinates.csv"

hemlock_gdf.to_csv(hemlock_csv_path, index=False)
other_trees_gdf.to_csv(other_trees_csv_path, index=False)


print(f"✅ Hemlock CSV saved with corrected geographic coordinates at: {hemlock_csv_path}")
print(f"✅ Other Trees CSV saved with corrected geographic coordinates at: {other_trees_csv_path}")


✅ Hemlock CSV saved with corrected geographic coordinates at: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from DeepForest\UTM Coordinates.csv
✅ Other Trees CSV saved with corrected geographic coordinates at: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Not Hemlocks from DeepForest\UTM Coordinates.csv


In [None]:
#Hemlock from deepforest prediction: convert UTM TO PIXEL COORDINATES

In [10]:
import rasterio
import pandas as pd
from rasterio.transform import Affine

# --- Set file paths ---
raster_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Raster Layers\Fixed-Fiery-Gizzard-Trail-5-10-2025-orthophoto.tif"
output_csv_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from DeepForest\Pixel Coordinates.csv"

# --- Load raster to get affine transform ---
with rasterio.open(raster_path) as dataset:
    raster_crs = dataset.crs
    transform_matrix = dataset.transform

# --- Load CSV file with UTM bounding boxes ---
df = pd.read_csv(hemlock_csv_path)
gdf = df.copy()

# --- Drop rows with missing UTM coords ---
gdf = gdf.dropna(subset=["xmin", "xmax", "ymin", "ymax"])

# --- Define transformation function ---
def utm_to_pixel(x_utm, y_utm, transform_matrix):
    col, row = ~transform_matrix * (x_utm, y_utm)
    return int(col), int(row)

# --- Apply coordinate conversion ---
gdf["xmin_px"], gdf["ymin_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmin"], row["ymax"], transform_matrix), axis=1))
gdf["xmax_px"], gdf["ymax_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmax"], row["ymin"], transform_matrix), axis=1))

# --- Fill the image_path column ---
raster_filename = "Fixed-Fiery-Gizzard-Trail-5-10-2025-orthophoto.tif"
gdf["image_path"] = raster_filename

# --- Ensure the label column exists and is filled with 1 ---
gdf["label"] = 1

# --- Save result ---
gdf.to_csv(output_csv_path, index=False)

print(f"✅ Pixel coordinates (with label column) saved to: {output_csv_path}")


✅ Pixel coordinates (with label column) saved to: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from DeepForest\Pixel Coordinates.csv


In [None]:
#Crop deepforest hemlocks from pixel coordinates using PIL

In [8]:
import os
import pandas as pd
import numpy as np
import rasterio
from PIL import Image

# Inputs
annotations_csv = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from DeepForest\Pixel Coordinates.csv"
output_dir = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from DeepForest"

os.makedirs(output_dir, exist_ok=True)

# Load the CSV
df = pd.read_csv(annotations_csv)

# Ensure labels are string
df["label"] = df["label"].astype(str)

# List to store metadata
metadata = []
source_name = "DeepForest"

# Crop each box manually using rasterio and PIL
with rasterio.open(raster_path) as src:
    for i, row in df.iterrows():
        xmin, ymin = int(row["xmin_px"]), int(row["ymin_px"])
        xmax, ymax = int(row["xmax_px"]), int(row["ymax_px"])
        label = row["label"]

        # Ensure subfolder exists
        label_dir = os.path.join(output_dir, label)
        os.makedirs(label_dir, exist_ok=True)

        try:
            # Crop window
            window = rasterio.windows.Window.from_slices((ymin, ymax), (xmin, xmax))
            crop = src.read(window=window)  # [bands, height, width]

            if crop.shape[0] < 3:
                print(f"⚠️ Skipping {i}: Not enough bands.")
                continue

            # Reformat bands
            crop = np.transpose(crop[:3], (1, 2, 0))  # (rows, cols, bands)

            # Clip safely, cast to uint8
            crop = np.clip(crop, 0, 255).astype(np.uint8)

            # Create filename
            filename = f"crop_{source_name}_{i:04d}_{label}.png"
            out_path = os.path.join(label_dir, filename)

            # ✅ Save with PIL (RGB order preserved)
            im = Image.fromarray(crop)
            im.save(out_path)

            # Save metadata
            metadata.append({
                "filename": filename,
                "xmin_px": xmin,
                "ymin_px": ymin,
                "xmax_px": xmax,
                "ymax_px": ymax,
                "label": label,
                "source": source_name
            })

        except Exception as e:
            print(f"❌ Crop failed at {i}: {e}")

# Save metadata
metadata_df = pd.DataFrame(metadata)
metadata_csv_path = os.path.join(output_dir, "crop_metadata.csv")
metadata_df.to_csv(metadata_csv_path, index=False)

print(f"✅ Crops and metadata saved in: {output_dir}")
print(f"✅ Metadata saved at: {metadata_csv_path}")

✅ Crops and metadata saved in: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from DeepForest
✅ Metadata saved at: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from DeepForest\crop_metadata.csv


In [None]:
#Non hemlock from deepforest: convert UTM to Pixel Coordinates

In [11]:

# Paths
output_csv_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Not Hemlocks from DeepForest\Pixel Coordinates.csv"

# --- Load raster to get affine transform ---
with rasterio.open(raster_path) as dataset:
    raster_crs = dataset.crs
    transform_matrix = dataset.transform

# --- Load CSV file with UTM bounding boxes ---
df = pd.read_csv(other_trees_csv_path)
gdf = df.copy()

# --- Drop rows with missing UTM coords ---
gdf = gdf.dropna(subset=["xmin", "xmax", "ymin", "ymax"])

# --- Define transformation function ---
def utm_to_pixel(x_utm, y_utm, transform_matrix):
    col, row = ~transform_matrix * (x_utm, y_utm)
    return int(col), int(row)

# --- Apply coordinate conversion ---
gdf["xmin_px"], gdf["ymin_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmin"], row["ymax"], transform_matrix), axis=1))
gdf["xmax_px"], gdf["ymax_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmax"], row["ymin"], transform_matrix), axis=1))

# --- Fill the image_path column ---
raster_filename = "Fixed-Fiery-Gizzard-Trail-5-10-2025-orthophoto.tif"
gdf["image_path"] = raster_filename

# --- Ensure the label column exists and is filled with 1 ---
gdf["label"] = 0

# --- Save result ---
gdf.to_csv(output_csv_path, index=False)

print(f"✅ Pixel coordinates (with label column) saved to: {output_csv_path}")


✅ Pixel coordinates (with label column) saved to: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Not Hemlocks from DeepForest\Pixel Coordinates.csv


In [None]:
#crop non hemlock images using custom function from pixel coordinates using PIL

In [31]:

# Inputs
annotations_csv = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Not Hemlocks from DeepForest\Pixel Coordinates.csv"
output_dir = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Not Hemlocks from DeepForest"

os.makedirs(output_dir, exist_ok=True)

# Load the CSV
df = pd.read_csv(annotations_csv)

# Ensure labels are string
df["label"] = df["label"].astype(str)

# List to store metadata
metadata = []
source_name = "DeepForest"

# Crop each box manually using rasterio and PIL
with rasterio.open(raster_path) as src:
    for i, row in df.iterrows():
        xmin, ymin = int(row["xmin_px"]), int(row["ymin_px"])
        xmax, ymax = int(row["xmax_px"]), int(row["ymax_px"])
        label = row["label"]

        # Ensure subfolder exists
        label_dir = os.path.join(output_dir, label)
        os.makedirs(label_dir, exist_ok=True)

        try:
            # Crop window
            window = rasterio.windows.Window.from_slices((ymin, ymax), (xmin, xmax))
            crop = src.read(window=window)  # [bands, height, width]

            if crop.shape[0] < 3:
                print(f"⚠️ Skipping {i}: Not enough bands.")
                continue

            # Reformat bands
            crop = np.transpose(crop[:3], (1, 2, 0))  # (rows, cols, bands)

            # Clip safely, cast to uint8
            crop = np.clip(crop, 0, 255).astype(np.uint8)

            # Create filename
            filename = f"crop_{source_name}_{i:04d}_{label}.png"
            out_path = os.path.join(label_dir, filename)

            # ✅ Save with PIL (RGB order preserved)
            im = Image.fromarray(crop)
            im.save(out_path)

            # Save metadata
            metadata.append({
                "filename": filename,
                "xmin_px": xmin,
                "ymin_px": ymin,
                "xmax_px": xmax,
                "ymax_px": ymax,
                "label": label,
                "source": source_name
            })

        except Exception as e:
            print(f"❌ Crop failed at {i}: {e}")

# Save metadata
metadata_df = pd.DataFrame(metadata)
metadata_csv_path = os.path.join(output_dir, "crop_metadata.csv")
metadata_df.to_csv(metadata_csv_path, index=False)

print(f"✅ Crops and metadata saved in: {output_dir}")
print(f"✅ Metadata saved at: {metadata_csv_path}")



✅ Crops and metadata saved in: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Not Hemlocks from DeepForest
✅ Metadata saved at: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Not Hemlocks from DeepForest\crop_metadata.csv


In [None]:
#custom haemlock annotations 

In [None]:
#shapefile to csv with geographical coordinates(UTM) using deepforest shapefile_to_annotations function

In [17]:
from deepforest.utilities import shapefile_to_annotations

# Define your shapefile paths
hemlock_shp_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\QGIS Shapefiles\Hemlocks Custom Annotations (QGIS ).shp"

# Convert to annotations (bounding boxes)
hemlock_df = shapefile_to_annotations(
    shapefile=hemlock_shp_path,
    rgb= raster_path
,
    buffer_size=0.15
)

# Save as CSV
hemlock_df.to_csv(r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from QGIS Annotations\UTM Coordinates.csv", index=False)

print("✅ DataFrames saved successfully.")



Geometry type of shapefile is Polygon
CRS of shapefile is EPSG:32616
✅ DataFrames saved successfully.


In [None]:
#convert UTM coordinates to pixel coordinates

In [19]:
from rasterio.transform import Affine
from shapely import wkt

# Load the raster to check CRS
with rasterio.open(raster_path) as dataset:
    raster_crs = dataset.crs  # Get raster CRS
    transform_matrix = dataset.transform  # Affine transformation for pixel conversion

# Load the CSV dataset 
df = pd.read_csv(r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from QGIS Annotations\UTM Coordinates.csv")

# Convert 'geometry' column from WKT string to shapely objects
df["geometry"] = df["geometry"].apply(wkt.loads)

# Convert to GeoDataFrame using parsed geometry
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:32616")

# Extract bounding box coordinates from geometry
gdf["xmin"] = gdf.geometry.bounds["minx"]
gdf["ymin"] = gdf.geometry.bounds["miny"]
gdf["xmax"] = gdf.geometry.bounds["maxx"]
gdf["ymax"] = gdf.geometry.bounds["maxy"]

# Ensure CRS matches the raster CRS, reproject if necessary
if gdf.crs != raster_crs:
    print(f"Reprojecting from {gdf.crs} to {raster_crs}")
    gdf = gdf.to_crs(raster_crs)

# Drop rows with any missing bounding box values
gdf = gdf.dropna(subset=["xmin", "xmax", "ymin", "ymax"])

# Function to convert UTM to Pixel Coordinates
def utm_to_pixel(x_utm, y_utm, transform_matrix):
    """Convert UTM coordinates to pixel coordinates using raster transformation."""
    col, row = ~transform_matrix * (x_utm, y_utm)
    return int(col), int(row)

# Apply transformation to all bounding boxes
gdf["xmin_px"], gdf["ymin_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmin"], row["ymax"], transform_matrix), axis=1))
gdf["xmax_px"], gdf["ymax_px"] = zip(*gdf.apply(lambda row: utm_to_pixel(row["xmax"], row["ymin"], transform_matrix), axis=1))

# Save updated dataset with pixel coordinates
output_csv_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from QGIS Annotations\Pixel Coordinates.csv"
gdf.to_csv(output_csv_path, index=False)

print(f"Conversion complete! Bounding boxes are now in pixel coordinates and saved to {output_csv_path}")


Conversion complete! Bounding boxes are now in pixel coordinates and saved to C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from QGIS Annotations\Pixel Coordinates.csv


In [21]:
from deepforest import model

# Inputs
annotations_csv = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\CSVs\Hemlocks from QGIS Annotations\Pixel Coordinates.csv"

# Define the output folder for cropped images
output_dir = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from QGIS Annotations"

# Label folder name (e.g., "1" for hemlocks)
label_folder = "1"

# Final output directory (inside '1')
final_output_dir = os.path.join(output_dir, label_folder)
os.makedirs(final_output_dir, exist_ok=True)

# Load dataset with pixel coordinates
df = pd.read_csv(annotations_csv)

# Ensure required columns exist
required_cols = ["xmin", "ymin", "xmax", "ymax"]
if not all(col in df.columns for col in required_cols):
    raise ValueError("CSV is missing required columns")

# Convert to list format for DeepForest
boxes = df[['xmin', 'ymin', 'xmax', 'ymax']].values.tolist()

# Labels: hard-coded label "1" because these are all hemlocks
source_name = "QGIS"
fixed_label = "1"  

# Extract the filename from raster_path
raster_filename = os.path.basename(raster_path)
raster_directory = os.path.dirname(raster_path)

# Ensure `images` is a list containing just the raster filename
images = [raster_filename] * len(boxes)

# Initialize CropModel
crop_model = model.CropModel(num_classes=2)

# Open raster manually
with rasterio.open(raster_path) as src:
    # Collect metadata
    metadata = []

    for idx, box in enumerate(boxes):
        xmin, ymin, xmax, ymax = map(int, box)

        try:
            # Crop
            window = src.read(window=((ymin, ymax), (xmin, xmax)))  # [bands, rows, cols]
            if window.shape[0] < 3:
                print(f"⚠️ Skipping {idx}: not enough bands.")
                continue

            window = window.transpose(1, 2, 0)  # [rows, cols, bands]
            window = (window.clip(0, 255)).astype('uint8')

            # Create a smart filename
            filename = f"crop_{source_name}_{idx:04d}_{fixed_label}.png"
            out_path = os.path.join(final_output_dir, filename)

            # Save crop
            im = Image.fromarray(window)
            im.save(out_path)

            # Save metadata
            metadata.append({
                "filename": filename, 
                "xmin_px": xmin,
                "ymin_px": ymin,
                "xmax_px": xmax,
                "ymax_px": ymax,
                "label": fixed_label,
                "source": source_name
            })

        except Exception as e:
            print(f"❌ Crop failed at {idx}: {e}")

# Save metadata to CSV
metadata_df = pd.DataFrame(metadata)
metadata_csv_path = os.path.join(output_dir, "crop_metadata.csv")
metadata_df.to_csv(metadata_csv_path, index=False)

print(f"✅ Image crops and metadata saved in: {final_output_dir}")
print(f"✅ Metadata CSV saved at: {metadata_csv_path}")


✅ Image crops and metadata saved in: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from QGIS Annotations\1
✅ Metadata CSV saved at: C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from QGIS Annotations\crop_metadata.csv


In [None]:
#Filter out detections from DeepForest that are smaller than your target class. 
#DeepForest may produce predictions that are smaller than the target itself. 
#e.g. the model may draw boundin boxes around brachnes instead of the entire cannop. This can produce excesive amounts of data.
#use the code below to filter each image dataset based on minimum pixel size. Sort your target dataset (label = 1) by size. 
#Starting with the smallest images, work your way up unti you begin to see full canopy detections. calculate pixel size and use it as a threshold to filter incomplete detections
#Note training datasets (1 and 0) must be balanced (similar in size) for the classification model to train well. 


In [11]:
import os
from PIL import Image

folder_path = r"C:\Remote Sensing\Remote Sensing Projects\Fiery Gizzard Trail\Image Extraction\Image Datasets\Hemlocks from QGIS Annotations"
min_pixels = 20000
deleted_count = 0

#loop through images
for filename in os.listdir(folder_path):
    if filename.lower().endswith(".png"):
        file_path = os.path.join(folder_path, filename)
        try:
            with Image.open(file_path) as img:
                if img.width * img.height < min_pixels:
                    img.close()  # Ensure file is fully released
                    os.remove(file_path)
                    deleted_count += 1
        except:
            pass  # Skip problematic files silently

print(f"✅ Done. {deleted_count} images were deleted for being under {min_pixels} pixels.")

✅ Done. 0 images were deleted for being under 20000 pixels.


In [12]:
remaining_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".png")]
print(len(remaining_files))

0
