In [17]:
import pickle
import geopandas as gpd
from shapely.geometry import mapping
import rasterio
from rasterio.features import geometry_mask
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def crop_polygon_to_tile(polygon, tile_bounds):
    """Crop the polygon to the tile bounds."""
    tile_box = box(*tile_bounds)  # Create a bounding box for the tile
    cropped_polygon = polygon.intersection(tile_box)  # Intersect with the tile box
    return cropped_polygon

def extract_polygon_patch(polygon, image_data, src_transform):
    """Extract image patch within a polygon."""
    # Get the bounds of the polygon
    tile_bounds = polygon.bounds  # Automatically get bounds from the polygon
    cropped_polygon = crop_polygon_to_tile(polygon, tile_bounds)
    
    # Check if the cropped polygon is valid before proceeding
    if not cropped_polygon.is_empty:
        mask = geometry_mask([mapping(cropped_polygon)], transform=src_transform, invert=True, out_shape=(image_data.shape[1], image_data.shape[2]))
        masked_data = np.zeros_like(image_data[:3])
        
        for i in range(3):
            masked_data[i] = image_data[i] * mask
            
        bounds = cropped_polygon.bounds
        window = rasterio.windows.from_bounds(*bounds, transform=src_transform)
        row_off = int(window.row_off)
        col_off = int(window.col_off)
        height = int(window.height)
        width = int(window.width)
        patch = masked_data[:, row_off:row_off + height, col_off:col_off + width]
        patch = np.moveaxis(patch, 0, -1)
        patch = Image.fromarray(patch.astype(np.uint8))
        
        # Plot the image and mask side by side
        plt.figure(figsize=(15, 10))
        
        # Raster image with cropped polygon overlay
        plt.subplot(1, 2, 1)
        plt.imshow(image_data[0], cmap='gray', alpha=0.5)  # Show the first band of the image
        plt.gca().add_patch(plt.Polygon(list(cropped_polygon.exterior.coords), fill=None, edgecolor='red', linewidth=2, label='Cropped Polygon'))
        plt.title("Raster Image with Cropped Polygon Overlay")
        plt.legend()
        
        # Mask visualization
        plt.subplot(1, 2, 2)
        plt.imshow(mask, cmap='gray', alpha=0.5)  # Show the mask
        plt.title("Mask Visualization")
        plt.colorbar(label='Mask Value')

        plt.tight_layout()
        plt.show()
        
        return patch
    else:
        # Return None or handle the case where the polygon is empty
        print("Cropped polygon is empty.")
        return None

def process_shapefile(shapefile_path, tif_file_path):
    """Process shapefile to extract patches, labels, and FIDs."""
    polygons = gpd.read_file(shapefile_path)
    patches_and_labels = []
    
    with rasterio.open(tif_file_path) as src:
        image_data = src.read()
        src_transform = src.transform

    for idx, row in polygons.iterrows():
        polygon = row.geometry
        label = row['label']
        fid = int(row['FID'])  # Extract FID value
        transect = int(row['transect'])
        ID = f"{transect}_{fid}"
        print(label, fid, ID)

        patch = extract_polygon_patch(polygon, image_data, src_transform)
        if patch is not None:
            patches_and_labels.append((np.array(patch), label, ID))  # Store patch, label, and FID
        
    return patches_and_labels

# Example usage
shapefile_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_species6.shp'
tif_file_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif'
pickle_file_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\pickles\\transects_seg_species6_patches_and_labels_5.pkl'

# Process and save patches_and_labels to a pickle file
patches_and_labels = process_shapefile(shapefile_path, tif_file_path)
with open(pickle_file_path, 'wb') as f:
    pickle.dump(patches_and_labels, f)

print("patches_and_labels saved to patches_and_labels.pkl")


Liqu_fo 0 101_0


NameError: name 'box' is not defined

In [1]:
import geopandas as gpd

# Load the shapefile
shapefile_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\seg_merged\\merged_seg_masks_name.shp'
gdf = gpd.read_file(shapefile_path)

# Print the attribute table
print(gdf.head())  # Prints the first 5 rows of the attribute table

   FID transect                                           geometry
0  0.0      101  POLYGON ((292240.215 2612874.048, 292240.677 2...
1  1.0      101  POLYGON ((292185.804 2612874.915, 292185.862 2...
2  2.0      101  POLYGON ((292245.356 2612864.694, 292245.472 2...
3  3.0      101  POLYGON ((292239.753 2612910.368, 292240.331 2...
4  4.0      101  POLYGON ((292193.948 2612866.715, 292194.006 2...


In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, Polygon
from shapely.ops import nearest_points

# Load the polygons shapefile
polygons = gpd.read_file('h:\\Yehmh\\DNDF\\202404_DNDF\\seg_merged\\merged_seg_masks_name.shp')

# Load the CSV file
csv_data = pd.read_csv('h:\\Yehmh\\DNDF\\DNDF_tree_survey_2023_rotated.csv')

# Filter out points where the "Stem" value is not 0
csv_data = csv_data[csv_data['Stem'] == 0]

# Convert the filtered CSV data to a GeoDataFrame using TWD97_X and TWD97_Y
csv_gdf = gpd.GeoDataFrame(csv_data, geometry=gpd.points_from_xy(csv_data.TWD97_X, csv_data.TWD97_Y))

# Ensure the coordinate system matches between polygons and points (TWD97)
csv_gdf.set_crs(epsg=3826, inplace=True)  # EPSG:3826 is the code for TWD97 TM2
polygons.to_crs(epsg=3826, inplace=True)

# Buffer polygons by 1 meter for the surrounding area check
polygons_buffered = polygons.copy()
polygons_buffered['geometry'] = polygons_buffered.geometry.buffer(1)

# Function to determine the label for each polygon
def label_polygon(polygon, buffered_polygon, points):
    within_polygon = points[points.within(polygon)]
    around_polygon = points[points.within(buffered_polygon)]
    
    # Combine points within and around the polygon
    combined_points = pd.concat([within_polygon, around_polygon]).drop_duplicates()
    
    if combined_points.empty:
        return None
    
    species_count = combined_points['CSP'].value_counts()
    top_species = species_count.index[0]
    max_dbh_species = combined_points.loc[combined_points['DBH'].idxmax()]['CSP']
    
    # If one species is the majority and also the largest DBH, label it as such
    if species_count.index[0] == max_dbh_species:
        return max_dbh_species
    
    # If counts are equal and max_dbh_species is among the top two, return max_dbh_species
    if len(species_count) > 1 and species_count.iloc[0] == species_count.iloc[1]:
        if max_dbh_species in top_species:
            return max_dbh_species

    # Return the top species and the max DBH species
    return ','.join([top_species, max_dbh_species])

# Apply the labeling function to each polygon
polygons['label'] = polygons.apply(lambda row: label_polygon(row['geometry'], 
                                                              polygons_buffered.loc[row.name, 'geometry'], 
                                                              csv_gdf), axis=1)

# Drop polygons without labels
labeled_polygons = polygons.dropna(subset=['label'])

# Save the labeled polygons to a new shapefile
labeled_polygons.to_file('h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_name.shp')


In [2]:
import geopandas as gpd

# Load the labeled polygons shapefile
labeled_polygons = gpd.read_file('h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_name.shp')

# Filter polygons that have only one species in the label
single_species_polygons = labeled_polygons[labeled_polygons['label'].apply(lambda x: ',' not in x)]

# Filter polygons that have two species in the label
two_species_polygons = labeled_polygons[labeled_polygons['label'].apply(lambda x: ',' in x)]

# Save the single species polygons to a new shapefile
single_species_polygons.to_file('h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_sure_name.shp')

# Save the two species polygons to a new shapefile
two_species_polygons.to_file('h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_unsure_name.shp')


In [20]:
import geopandas as gpd

# Load the labeled polygons shapefile
labeled_polygons = gpd.read_file('h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_name.shp')
gdf = gpd.read_file(shapefile_path)

# Define the labels you want to keep
labels_to_keep = ['Bisc_ja', 'Liqu_fo', 'Cinn_bu', 'Zelk_se', 'Mach_zu', 'Frax_gr']

# Filter the GeoDataFrame to only include the specified labels
filtered_gdf = gdf[gdf['label'].isin(labels_to_keep)]

# Save the filtered GeoDataFrame to a new shapefile
filtered_shapefile_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_species6.shp'  # Update this with the desired output path
filtered_gdf.to_file(filtered_shapefile_path)

print(f"Filtered shapefile saved to {filtered_shapefile_path}")

Filtered shapefile saved to h:\Yehmh\DNDF\202404_DNDF\transects_seg_labeled\transects_seg_species6.shp


In [19]:
import geopandas as gpd
import pandas as pd

# File paths
shapefile_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_seg_labeled_name.shp'
csv_data_path = 'h:\\Yehmh\\DNDF\\DNDF_tree_survey_2023_rotated.csv'
output_csv_path = 'h:\\Yehmh\\DNDF\\202404_DNDF\\transects_seg_labeled\\transects_polygon_counts.csv'

# Read files
merged_gdf = gpd.read_file(shapefile_path)
csv_data = pd.read_csv(csv_data_path)

# Calculate polygon areas and filter by area > 2
merged_gdf['area'] = merged_gdf.geometry.area
filtered_gdf = merged_gdf[merged_gdf['area'] > 2]

# Count total and filtered polygons by transect
polygon_counts = merged_gdf['transect'].value_counts().rename("total_count").to_frame()
polygon_counts['filtered_count'] = filtered_gdf['transect'].value_counts()

# Load and filter tree data
csv_data = csv_data[csv_data['Stem'] == 0]
tree_counts = csv_data['Plot'].value_counts().rename('tree_counts').to_frame()
tree_counts['tree_counts_dbh10up'] = csv_data[csv_data['DBH'] > 10]['Plot'].value_counts()

# Ensure the 'Plot' column is of type string in both DataFrames
tree_counts.index = tree_counts.index.astype(str)
polygon_counts.index = polygon_counts.index.astype(str)

# Rename 'transect' column to 'Plot' in polygon_counts to match tree_counts DataFrame
polygon_counts = polygon_counts.rename_axis('Plot').reset_index()

# Merge DataFrames
merged_data = tree_counts.merge(polygon_counts, on='Plot', how='outer').fillna(0)
print(merged_data)

# Save the result to a CSV file
# merged_data.to_csv(output_csv_path, index=False)

print("Files merged successfully!")


   Plot  tree_counts  tree_counts_dbh10up  total_count  filtered_count
0    68          350                  154           67              64
1    81          246                  169           59              56
2   126          243                  106           42              42
3    70          241                  103           35              31
4    79          226                  127           49              47
5    41          221                  119           45              42
6    12          207                  156           24              23
7   101          182                  131           66              65
8    94          164                  117           55              54
9    16          151                   83           43              42
10   46          147                  106           35              35
11  112          146                   81           49              48
12   98          128                   84           54              53
13   8