In [1]:
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import os
import cv2

# Utility functions

In [2]:
def process_gvi_threshold(image_path, fixed_threshold=True):
    """
    Process RGB image to calculate green view index with Otsu thresholding.
    Following the proposed method in 10.1016/j.ufug.2015.06.006.
    """
    # Read RGB image
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    
    # Split into RGB bands
    red = img_rgb[:, :, 0].astype(np.float32)
    green = img_rgb[:, :, 1].astype(np.float32)
    blue = img_rgb[:, :, 2].astype(np.float32)
    
    # Calculate differences
    diff1 = green - red    # green - red difference
    diff2 = green - blue   # green - blue difference
    
    # Multiply differences
    diff = diff1 * diff2

    # Compute mask based on threshold
    if fixed_threshold:
        # 1. diff1 > 0
        # 2. diff > 0
        mask = (diff1 > 0) & (diff > 0)
        adaptive_threshold = 0 
    else: 
        # Apply 75th percentile threshold to diff values where green dominates red
        # Only consider pixels where diff1 > 0 (green > red)
        valid_diff = diff[diff1 > 0]
        
        if len(valid_diff) > 0:
            # Use 75th percentile as adaptive threshold
            adaptive_threshold = np.percentile(valid_diff, 75)
            # Create final mask: green dominates red AND diff exceeds percentile threshold
            mask = (diff1 > 0) & (diff > adaptive_threshold)
        else:
            # No pixels where green dominates red
            adaptive_threshold = 0
            mask = np.zeros_like(diff1, dtype=bool) 
    
    # Calculate Green View Index as percentage of pixels meeting criteria
    total_pixels = mask.size
    green_pixels = np.sum(mask)
    green_view_index = (green_pixels / total_pixels)
    
    return green_view_index, mask, adaptive_threshold

# Green View Index computation with value thresholding

In [3]:
data_folder = "data/svi/all/"
all_files = os.listdir(data_folder)
end_string= ".jpeg"
all_files = [f for f in all_files if end_string in f]

df_all_gvi_threshold = pd.DataFrame(columns=[
    "uuid", 
    "green_view_index_threshold_75", 
    "green_view_index_threshold_pos", 
    "otsu_threshold"
])

# takes some time to run on CPU
for file_name in tqdm(all_files, desc="Processing images"):
    # compute gvi 
    gvi_threshold_1, mask_1, threshold_75 = process_gvi_threshold(data_folder + file_name, fixed_threshold = False)
    gvi_threshold_2, mask_2, _ = process_gvi_threshold(data_folder + file_name, fixed_threshold = True)

    # Save masks as PNG
    masks_location = "data/segmented_threshold/"
    mask_image_1 = (mask_1.astype(np.uint8) * 255)
    mask_image_2 = (mask_2.astype(np.uint8) * 255)
    mask_path_1 = masks_location + file_name.replace('.jpeg', '') + '_segmented_75.png'
    mask_path_2 = masks_location + file_name.replace('.jpeg', '') + '_segmented_pos.png'
    cv2.imwrite(mask_path_1, mask_image_1)
    cv2.imwrite(mask_path_2, mask_image_2)

    # keep track of uuid (file_name) and their gvi 
    df_all_gvi_threshold.loc[len(df_all_gvi_threshold)] = {
        "uuid": file_name.replace(end_string, ""),
        "green_view_index_threshold_75": gvi_threshold_1,
        "green_view_index_threshold_pos": gvi_threshold_2,
        "otsu_threshold": threshold_75
    } 
    # end for loop

# save the file
df_all_gvi_threshold.to_csv("data/labels/processed/all_imgs_gvi_threshold.csv", index=False)
df_all_gvi_threshold.head()

Processing images:   0%|          | 0/436 [00:00<?, ?it/s]

Unnamed: 0,uuid,green_view_index_threshold_75,green_view_index_threshold_pos,otsu_threshold
0,d63c7ab6-5b2d-48a5-ab56-940829b51ddd,0.10145,0.119933,5.0
1,7c3d4231-ecfa-45f3-9994-803f9d48a5cf,0.221751,0.193442,-16.0
2,2df50d9d-3919-4e2c-9295-a933e9d073ab,0.142892,0.288867,68.0
3,6be29618-8235-4eae-b0ab-fe7e9fa5327e,0.188556,0.042613,-15.0
4,8185f428-a9be-4a0b-a66c-61d925dca0a1,0.077121,0.194082,56.0
