In [None]:
import rasterio
from rasterio.windows import from_bounds
import geopandas as gpd
from shapely.geometry import box

### remove files and folders

In [None]:
!pwd

In [None]:
import os

# Specify your folder path
folder_paths = ['output/feature_image','output/feature_importance','output/model','output/prediction' ]

for folder_path in folder_paths:
    # Loop through all files in the folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        # Remove only files (not subfolders)
        if os.path.isfile(file_path):
            os.remove(file_path)
            print(f"Removed file: {file_path}")

In [None]:
# # Read the shapefile
# aoi_path = '../../study_area/palisades_aoi.shp'
# aoi_gdf = gpd.read_file(aoi_path)

# # Extract the bounding box (minx, miny, maxx, maxy)
# bbox = aoi_gdf.total_bounds  # This gives the bounding box as a list [minx, miny, maxx, maxy]

# print("Bounding Box:", bbox)

In [None]:
# Palisades
minx, miny, maxx, maxy=337276.20835215, 3762751.49928493,  370706.59898943, 3781200.70468214
# prefix="palisades"
# sample_data_path= "../../sample_collection/palisades_random_sample.shp"
# clip_sample_output_path='sample/palisades_random_sample.shp'


# # # eaton
# minx, miny, maxx, maxy=390284.9643946937, 3778493.0553008374, 407654.9643946937, 3791083.0553008374
# ## Eaton
# prefix="eaton"
# sample_data_path= "../../sample_collection/eaton_random_sample.shp"
# clip_sample_output_path='sample/eaton_random_sample.shp'

In [None]:

sample_gdf = gpd.read_file(sample_data_path)

# Define the bounding box (minx, miny, maxx, maxy)
bbox = (minx, miny, maxx, maxy)  # Replace with actual bounds

# Create a polygon from the bounding box
bbox_polygon = box(*bbox)

# Convert the bounding box to a GeoDataFrame
bbox_gdf = gpd.GeoDataFrame({'geometry': [bbox_polygon]}, crs=sample_gdf.crs)

# Clip the original GeoDataFrame with the bounding box
clipped_gdf = gpd.overlay(sample_gdf, bbox_gdf, how='intersection')

# Count the total number of rows (data points)
total_rows = clipped_gdf.shape[0]
class_column_name='class'

# Count the number of unique classes (assuming the class column is named 'class')
unique_classes = clipped_gdf[class_column_name].nunique()

# Alternatively, to get a count of each unique class
class_counts = clipped_gdf[class_column_name].value_counts()

samples_per_class = clipped_gdf[clipped_gdf[class_column_name] == 1].shape[0]
print(samples_per_class)
# Function to sample a specified number of data points from each class
clipped_gdf = clipped_gdf.groupby('class').apply(lambda x: x.sample(n=samples_per_class, random_state=42))

# Reset index after applying the groupby operation
clipped_gdf = clipped_gdf.reset_index(drop=True)

# Display the results
print(f"Total number of rows: {total_rows}")
print(f"Number of unique classes: {unique_classes}")
print(f"Class distribution:\n{class_counts}")

# Save the clipped data to a new shapefile
clipped_gdf.to_file(clip_sample_output_path)

# Optional: Display the result
clipped_gdf.plot()

In [None]:

# input_sar_image_path  = "../../Asc_SAR_Data_Processing/10_projection/subset_Asc_spksigma_TC_32611.tif"
# input_sar_image_path  = "intermediate/sar_asc_avg.tif"
input_sar_image_path  = "../../Desc_SAR_Data_Processing/11_ordered/subset_S1A_Desc_partial_Cal_Stack_Spk_TC_32611_ordered.tif"


# glcm_raster_path="input/dGLCM.tif"
# glcm_pca_raster_path="input/dGLCM_pca.tif"
# thermal_image_path="../../ThermalData/03_diff/dTRAD.tif"
# dnbr_image_path="../../S2_Data_Processing/Indices/04_CLIP/dNBR_bilinear_10_clip.tif"

In [None]:
!pwd

In [None]:

images_dict={
            f"palisades_dnbr_label_clip":"gt/palisades_dnbr_label.tif",

            # f"{prefix}_s1_desc_ready":input_sar_image_path,
            # f"{prefix}_s1_avg_ready":input_sar_image_path,

            #  f"{prefix}_glcm_ready":glcm_raster_path,
            #  f"{prefix}_glcm_pca_ready":glcm_pca_raster_path,

            #  f"{prefix}_thermal_ready":thermal_image_path,
            # f"{prefix}_dnbr_ready":dnbr_image_path,
             }




### save in order: VH_PRE, VV_PRE, VH_POST, VV_POST

In [None]:

# # save in order: VH_PRE, VV_PRE, VH_POST, VV_POST
# input_sar_image_path="../../Desc_SAR_Data_Processing/10_projection/subset_S1A_Desc_partial_Cal_Stack_Spk_TC_32611.tif"
# output_ordered_image_path="../../Desc_SAR_Data_Processing/11_ordered/subset_S1A_Desc_partial_Cal_Stack_Spk_TC_32611_ordered.tif"
# with rasterio.open(input_sar_image_path) as dataset:
#     vh_post_band=dataset.read(1)
#     vv_post_band=dataset.read(2)
#     vh_pre_band=dataset.read(3)
#     vv_pre_band=dataset.read(4)
#     meta=dataset.meta.copy()

# # Write the clipped data to a new file
# with rasterio.open(output_ordered_image_path, 'w', **meta) as dest:
#     dest.write(vh_pre_band, 1)  
#     dest.write(vv_pre_band, 2)  
#     dest.write(vh_post_band, 3)  
#     dest.write(vv_post_band, 4)  


In [None]:
for image_key in list(images_dict.keys()):
    print(image_key)

    with rasterio.open(images_dict[image_key]) as dataset:
        # Print the properties
        print(f"CRS: {dataset.crs}")
        print(f"Width: {dataset.width}")
        print(f"Height: {dataset.height}")
        print(f"Extent: {dataset.bounds}")
        print(f"Resolution: { dataset.res}")
        print("----------------------------------------")

## Clipping data by extent

In [None]:
clip_extent = (minx, miny, maxx, maxy)

In [None]:
for image_key in list(images_dict.keys()):

    # print(image_key.endswith("s1_ready"))
    # Open the raster dataset
    with rasterio.open(images_dict[image_key]) as dataset:

        # Get the window for the specified extent
        window = from_bounds(*clip_extent, dataset.transform)
        # Read the data in that window
        # clipped_data = dataset.read(1, window=window)  # Read the first band in the window
        
        # Get the new transform for the clipped data
        clipped_transform = dataset.window_transform(window)
        clipped_data = dataset.read(1, window=window)
        # Define the metadata for the clipped raster
        clipped_meta = dataset.meta.copy()
        

        # if image_key.endswith("s1_ready"):
        #     count=4
        #     clipped_meta.update({
        #     'height': clipped_data.shape[0],
        #     'width': clipped_data.shape[1],
        #     'transform': clipped_transform,
        #     'count':4
        # })
        # else:
        count=dataset.count
        clipped_meta.update({
        'height': clipped_data.shape[0],
        'width': clipped_data.shape[1],
        'transform': clipped_transform
        })

        # bands = dataset.read()
        
        # Write the clipped data to a new file
        with rasterio.open(f'input/{image_key}.tif', 'w', **clipped_meta) as dest:
            # if image_key.endswith("s1_ready"):
            #     vh_pre = dataset.read(3, window=window)
            #     vv_pre = dataset.read(4, window=window)
            #     vh_post= dataset.read(5, window=window)
            #     vv_post= dataset.read(6, window=window)


                
            #     dest.write(vh_pre, 1)
            #     dest.write(vv_pre, 2)
            #     dest.write(vh_post, 3)
            #     dest.write(vv_post, 4)
            # else:
            for i in range(count):
                clipped_data = dataset.read(i+1, window=window)
                dest.write(clipped_data, i+1)  # Write the first band


## Determining Threshold


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import rasterio
from skimage.filters import threshold_otsu

# Read the dNBR TIFF image using rasterio
file_path = 'input/palisades_dnbr_ready.tif'  # Update with your dNBR image file path
output_file_path = 'gt/palisades_label_0_17.tif'

with rasterio.open(file_path) as src:
    dNBR_data = src.read(1)  # Read the first band (assuming it's a single-band image)
    print(np.min(dNBR_data),"min")
# dNBR_data = np.nan_to_num(dNBR_data, nan=0, posinf=0, neginf=0)
dNBR_data[dNBR_data == -3.4028235e+38] = np.nan  # Replace with NaN for better handling

# Optionally, you can replace NaN values with 0 if you want
dNBR_data = np.nan_to_num(dNBR_data, nan=0)
# Flatten the data to 1D array for histogram calculation
dNBR_flat = dNBR_data.flatten()

# Plot histogram of dNBR values
plt.hist(dNBR_flat, bins=50, range=(dNBR_flat.min(), dNBR_flat.max()), alpha=0.75, color='blue')
plt.title("Histogram of dNBR Values")
plt.xlabel("dNBR values")
plt.ylabel("Frequency")
plt.show()

# Apply Otsu's thresholding method to find the optimal threshold
threshold = threshold_otsu(dNBR_data)
print(f"Optimal threshold based on Otsu's method: {threshold}")

# Classify the dNBR image using the threshold (Burnt vs Non-burnt)
classified_image = dNBR_data > threshold

# Plot the classified image
plt.imshow(classified_image, cmap='gray')
plt.title("Classified Burnt Area (Burnt = White, Non-Burnt = Black)")
plt.colorbar(label="Class (0 = Non-Burnt, 1 = Burnt)")
plt.show()

# Optionally, you can save the classified image as a new TIFF file
with rasterio.open(output_file_path, 'w', driver='GTiff', height=dNBR_data.shape[0], width=dNBR_data.shape[1],
                   count=1, dtype=dNBR_data.dtype, crs=src.crs, transform=src.transform) as dst:
    dst.write(classified_image.astype(np.uint8), 1)
