In [3]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import os

# Paths to the input files
# shapefile_path = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_shp_labeled\\DNDF101_clip_seg_labeled.shp'
# tif_path = 'h:\\Yehmh\\DNDF\\101_1_focus\\DNDF101_clip.tif'
# output_folder = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_labeled'

shapefile_path = 'h:\\Yehmh\\BCI\\BCI_50ha_2020_08_01_crownmap_improved_ExportFeatures.shp'
tif_path = 'h:\\Yehmh\\BCI\\BCI_50ha_2020_08_01_global.tif'
output_folder = 'h:\\Yehmh\\BCI\\2020_08_01_seg_images_labeled'

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Read the shapefile
gdf = gpd.read_file(shapefile_path)

# Initialize a dictionary to keep counters for each species
counters = {}

# Read the raster file
with rasterio.open(tif_path) as src:
    # Loop through each polygon in the shapefile
    for idx, row in gdf.iterrows():
        # Get the geometry of the polygon
        geom = row['geometry']
        
        # Get the "sp" attribute
        # sp_value = row['sp']
        sp_value = row['Mnemonic']
        
        # Create a folder for the "sp" value if it doesn't exist
        sp_folder = os.path.join(output_folder, str(sp_value))
        os.makedirs(sp_folder, exist_ok=True)

        # Initialize the counter for this species if not already done
        if sp_value not in counters:
            counters[sp_value] = 1
        
        # Use the geometry to crop the raster
        out_image, out_transform = mask(src, [geom], crop=True)
        out_meta = src.meta.copy()
        
        # Update the metadata to reflect the new shape
        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform
        })
        
        # Define the output file path
        output_file = os.path.join(sp_folder, f'{sp_value}_{counters[sp_value]}.tif')
        
        # Save the cropped image
        with rasterio.open(output_file, "w", **out_meta) as dest:
            dest.write(out_image)

        # Increment the counter for this species
        counters[sp_value] += 1

print(f"Cropped images saved in {output_folder}")


Cropped images saved in h:\Yehmh\BCI\2020_08_01_seg_images_labeled


In [2]:
import os
output_folder = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_labeled'

for sp_folder in os.listdir(output_folder):
    full_sp_folder = os.path.join(output_folder, sp_folder)
    if os.path.isdir(full_sp_folder):
        temp_files = [f for f in os.listdir(full_sp_folder) if f.endswith('.tif')]
        temp_files.sort()  # Sort the files to ensure consistent ordering
        for count, temp_file in enumerate(temp_files, start=1):
            new_name = f'{sp_folder}_{count}.tif'
            os.rename(os.path.join(full_sp_folder, temp_file), os.path.join(full_sp_folder, new_name))

In [4]:
# crop images that contain most pixels

import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

data_dir = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_labeled'
output_dir = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_images_labeled_cropped'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

X = []  # Features
y = []  # Labels

crop_size = 64

def find_best_crops(image, crop_size):
    height, width, _ = image.shape
    crops = []
    step_size = crop_size // 2  # Overlap by half the crop size to find best crop

    for i in range(0, height - crop_size + 1, step_size):
        for j in range(0, width - crop_size + 1, step_size):
            crop = image[i:i+crop_size, j:j+crop_size]
            black_pixel_count = np.sum(np.all(crop == [0, 0, 0], axis=-1))
            crops.append((crop, black_pixel_count))

    # Sort crops by the number of black pixels (ascending)
    crops.sort(key=lambda x: x[1])
    return [crop for crop, count in crops[:3]]  # Adjust number of crops as needed

for folder in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder)
    for file in os.listdir(folder_path):
        if file.endswith(".tif"):
            image_path = os.path.join(folder_path, file)
            image = cv2.imread(image_path)

            # Find the best crops with the least number of black pixels
            best_crops = find_best_crops(image, crop_size)
            for idx, crop in enumerate(best_crops):
                X.append(crop)
                y.append(folder)
                
                # Save the cropped image with the new naming convention
                new_file_name = f"{os.path.splitext(file)[0]}_{idx}.tif"
                output_path = os.path.join(output_dir, folder)
                os.makedirs(output_path, exist_ok=True)
                save_path = os.path.join(output_path, new_file_name)
                
                cv2.imwrite(save_path, crop)

                # Optionally display the cropped image
                # plt.imshow(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
                # plt.title(f"Cropped Image from folder: {folder}")
                # plt.axis('off')
                # plt.show()