In [1]:
# === Imports ===
import os
import csv
import pandas as pd
import numpy as np
from PIL import Image
from roboflow import Roboflow
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import ret

In [2]:
# === 0. CONFIGUREER PATHS ===
METADATA_FOLDER = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\Testing data\metadata"
TEST_PATCH_DIR = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\Testing data\test"
OUTPUT_METADATA = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\metadata_with_brightness.csv"

In [3]:
# === Thresholds per class (handmatig gekozen) ===
brightness_thresholds = {
    "Clear Water": 31,
    "Common reed": 70,
    "Duckweed": 92,
    "Other": 56,
    "Water-starwort": 61
}

In [4]:
# === 1. COMBINEER METADATA BESTANDEN EN SORTEER OP SEGMENT ===

import re  # Import the re module for regular expressions

metadata_dfs = []
for file in sorted(os.listdir(METADATA_FOLDER)):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(METADATA_FOLDER, file))
        if 'light_condition' in df.columns:
            df = df.drop(columns=['light_condition'])
        metadata_dfs.append(df)

combined_metadata = pd.concat(metadata_dfs, ignore_index=True)

def extract_segment_number(segment):
    match = re.search(r'F(\d+)', segment)
    return int(match.group(1)) if match else 0

combined_metadata['segment_number'] = combined_metadata['segment'].apply(extract_segment_number)
combined_metadata = combined_metadata.sort_values(by='segment_number').drop(columns=['segment_number']).reset_index(drop=True)

print(f"Metadata gecombineerd: {len(combined_metadata)} rijen")

Metadata gecombineerd: 2640 rijen


In [5]:
# === 2. FILTER METADATA OP EFFECTIEVE TEST PATCHES ===
actual_patches = []
for class_name in os.listdir(TEST_PATCH_DIR):
    class_dir = os.path.join(TEST_PATCH_DIR, class_name)
    if not os.path.isdir(class_dir):
        continue
    for img_file in os.listdir(class_dir):
        if img_file.lower().endswith((".jpg", ".jpeg", ".png")):
            actual_patches.append(img_file)

actual_patches_cleaned = []
for patch in actual_patches:
    match = re.match(r"^(.*)_jpg\.rf\..*\.jpg$", patch)
    if match:
        cleaned = match.group(1) + ".jpg"
        actual_patches_cleaned.append(cleaned)
    else:
        actual_patches_cleaned.append(patch)

combined_metadata['patch_filename_lower'] = combined_metadata['patch_filename'].str.lower()
actual_patches_cleaned_lower = [p.lower() for p in actual_patches_cleaned]

metadata_filtered = combined_metadata[combined_metadata['patch_filename_lower'].isin(actual_patches_cleaned_lower)].drop(columns=['patch_filename_lower'])

print(f"Metadata gefilterd op echte patches: {len(metadata_filtered)} rijen")

Metadata gefilterd op echte patches: 2330 rijen


In [6]:
# === 3. BRIGHTNESS METEN EN LIGHT CONDITION TOEVOEGEN ===
brightness_records = []

for class_name in os.listdir(TEST_PATCH_DIR):
    class_dir = os.path.join(TEST_PATCH_DIR, class_name)
    if not os.path.isdir(class_dir):
        continue
    for img_file in os.listdir(class_dir):
        if not img_file.lower().endswith((".jpg", ".jpeg", ".png")):
            continue
        img_path = os.path.join(class_dir, img_file)
        img = Image.open(img_path).convert("L")
        bright = np.array(img).mean()

        threshold = brightness_thresholds.get(class_name, 100)
        light_cond = "shadow" if bright < threshold else "sun"

        match = re.match(r"^(.*)_jpg\.rf\..*\.jpg$", img_file)
        cleaned_name = match.group(1) + ".jpg" if match else img_file

        brightness_records.append({
            "patch_filename": cleaned_name,
            "true_label": class_name,      # <-- VOEG DIT TOE
            "brightness": bright,
            "light_condition_new": light_cond
        })

df_brightness = pd.DataFrame(brightness_records)

print(f"Brightness gemeten: {len(df_brightness)} patches")

# Merge brightness info in metadata
final_metadata = pd.merge(metadata_filtered, df_brightness, on="patch_filename", how="left")


Brightness gemeten: 2330 patches


In [7]:
# === 4. MERGE METADATA EN OPSLAAN ===
final_metadata = pd.merge(metadata_filtered, df_brightness, on="patch_filename", how="left")
final_metadata.to_csv(OUTPUT_METADATA, index=False)

print(f"Metadata met brightness succesvol opgeslagen als '{OUTPUT_METADATA}'")

Metadata met brightness succesvol opgeslagen als 'C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\metadata_with_brightness.csv'


In [None]:
# === 5. Maak controle plots per klasse ===

import matplotlib.pyplot as plt

# Groepeer brightness info per klasse
classes = df_brightness['true_label'].unique()

# Maak een aparte plot per klasse
for class_name in classes:
    subset = df_brightness[df_brightness['true_label'] == class_name]
    
    plt.figure(figsize=(8, 5))
    plt.hist(subset['brightness'], bins=30, alpha=0.7, edgecolor='black')
    
    # Threshold lijn toevoegen (indien beschikbaar)
    if class_name in brightness_thresholds:
        plt.axvline(brightness_thresholds[class_name], color='red', linestyle='--', label=f"Threshold ({brightness_thresholds[class_name]})")

    plt.title(f"Brightness Distribution - {class_name}")
    plt.xlabel("Brightness (Mean Pixel Value)")
    plt.ylabel("Aantal patches")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [3]:
import os
import pandas as pd
import re

# === Inputs ===
csv_path = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\metadata_with_brightness.csv"
test_dir = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\Definitief_29_04\Test_dataset\test_v5_resize" # This is the folder with subfolders per class

# === Load CSV filenames ===
df = pd.read_csv(csv_path)
csv_filenames = set(df['patch_filename'].apply(lambda x: os.path.basename(x)))  # e.g., F2_0087_patch_37.jpg

# === Extract and normalize filenames from test folder ===
test_filenames = set()

for root, _, files in os.walk(test_dir):
    for f in files:
        # Only process image files
        if f.endswith(('.jpg', '.jpeg', '.png')):
            # Normalize filename by stripping Roboflow-style suffix
            # Example: F9_0064_patch_126_jpg.rf.1ddde84dfb1f4b752e75c71f0158c010 → F9_0064_patch_126.jpg
            match = re.match(r'^(.*?)(_jpg)?\.rf\..*\.(jpg|jpeg|png)$', f)
            if match:
                base = match.group(1) + '.jpg'
                test_filenames.add(base)
            else:
                # If no RF hash, still normalize to .jpg
                test_filenames.add(os.path.splitext(f)[0] + '.jpg')

# === Compare sets ===
missing_patches = csv_filenames - test_filenames  # In CSV but not in folder
extra_patches = test_filenames - csv_filenames    # In folder but not in CSV

# === Output results ===
print(f"🔍 Missing patches (in CSV, not in test folder): {len(missing_patches)}")
for f in sorted(missing_patches):
    print(f"  - {f}")

print(f"\nExtra patches (in test folder, not in CSV): {len(extra_patches)}")
for f in sorted(extra_patches):
    print(f"  - {f}")


🔍 Missing patches (in CSV, not in test folder): 132
  - F16_0172_patch_1.jpg
  - F16_0172_patch_10.jpg
  - F16_0172_patch_100.jpg
  - F16_0172_patch_101.jpg
  - F16_0172_patch_102.jpg
  - F16_0172_patch_105.jpg
  - F16_0172_patch_106.jpg
  - F16_0172_patch_107.jpg
  - F16_0172_patch_108.jpg
  - F16_0172_patch_109.jpg
  - F16_0172_patch_11.jpg
  - F16_0172_patch_110.jpg
  - F16_0172_patch_113.jpg
  - F16_0172_patch_114.jpg
  - F16_0172_patch_115.jpg
  - F16_0172_patch_116.jpg
  - F16_0172_patch_117.jpg
  - F16_0172_patch_12.jpg
  - F16_0172_patch_121.jpg
  - F16_0172_patch_122.jpg
  - F16_0172_patch_123.jpg
  - F16_0172_patch_124.jpg
  - F16_0172_patch_125.jpg
  - F16_0172_patch_13.jpg
  - F16_0172_patch_130.jpg
  - F16_0172_patch_131.jpg
  - F16_0172_patch_132.jpg
  - F16_0172_patch_133.jpg
  - F16_0172_patch_139.jpg
  - F16_0172_patch_14.jpg
  - F16_0172_patch_140.jpg
  - F16_0172_patch_141.jpg
  - F16_0172_patch_148.jpg
  - F16_0172_patch_149.jpg
  - F16_0172_patch_156.jpg
  - F16_01

In [4]:
import os
import pandas as pd
import re

# === Inputs ===
csv_path = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\metadata_with_brightness.csv"
test_dir = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\Definitief_29_04\Test_dataset\test_v5_resize" # This is the folder with subfolders per class

# === Load CSV filenames ===
df = pd.read_csv(csv_path)
csv_filenames = set(df['patch_filename'].apply(lambda x: os.path.basename(x)))  # e.g., F2_0087_patch_37.jpg

# === Get normalized test filenames from folder ===
test_filenames = set()

for root, _, files in os.walk(test_dir):
    for f in files:
        if f.endswith(('.jpg', '.jpeg', '.png')):
            match = re.match(r'^(.*?)(_jpg)?\.rf\..*\.(jpg|jpeg|png)$', f)
            if match:
                normalized = match.group(1) + '.jpg'
            else:
                normalized = os.path.splitext(f)[0] + '.jpg'
            test_filenames.add(normalized)

# === Filter the CSV ===
filtered_df = df[df['patch_filename'].isin(test_filenames)]

# === Save result ===
filtered_df.to_csv('filtered_test_brightness.csv', index=False)
print(f"✅ Filtered CSV saved: {len(filtered_df)} patches retained out of {len(df)}")

✅ Filtered CSV saved: 2198 patches retained out of 2330


In [10]:
import pandas as pd
import numpy as np

# Load existing file
df = pd.read_csv('filtered_test_brightness.csv')


# Define your 2 manual rows
manual_entries = pd.DataFrame([
    {
        'patch_filename': 'F2_0087_patch_2.jpg',
        'segment': 'F2',
        'photo_id': 87,
        'patch_id': 2,
        'x': 2816,
        'y': 0,
        'altitude': 35,
        'true_label': 'Water-starwort',
        'brightness': 22.432258,
        'light_condition_new': 'shadow'
    },
    {
        'patch_filename': 'F21_0182_patch_4.jpg',
        'segment': 'F21',
        'photo_id': 182,
        'patch_id': 4,
        'x': 1920,
        'y': 0,
        'altitude': 20,
        'true_label': 'Clear water',
        'brightness': 56.551120,
        'light_condition_new': 'sun'
    }
])

# Append and save
df = pd.concat([df, manual_entries], ignore_index=True)
df_sorted = df.sort_values(by="patch_filename").reset_index(drop=True)
df_sorted.to_csv('extended_test_brightness.csv', index=True)
print("✅ Two manual rows added.")


✅ Two manual rows added.


In [12]:
# Extract numeric part of the segment for proper numerical sorting
df_sorted["segment_num"] = df_sorted["segment"].str.extract(r'F(\d+)').astype(int)

# Sort by numeric segment, then photo_id, then patch_id
df_sorted_numeric = df_sorted.sort_values(by=["segment_num", "photo_id", "patch_id"]).drop(columns="segment_num").reset_index(drop=True)

df_sorted_numeric.to_csv('extend_test_brightness_sorted.csv', index=False)

In [6]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import re
TEST_PATCH_DIR = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\Definitief_29_04\Test_dataset\test_v5_resize"

# === Thresholds per class ===
brightness_thresholds = {
    "Clear Water": 31,
    "Common reed": 70,
    "Duckweed": 92,
    "Other": 56,
    "Water-starwort": 61
}


# === Only calculate for these two patch names ===
target_patches = {"F2_0087_patch_2.jpg", "F21_0182_patch_4.jpg"}

# === Initialize output list ===
brightness_records = []

# === Loop through TEST_PATCH_DIR recursively ===
for class_name in os.listdir(TEST_PATCH_DIR):
    class_dir = os.path.join(TEST_PATCH_DIR, class_name)
    if not os.path.isdir(class_dir):
        continue

    for img_file in os.listdir(class_dir):
        if not img_file.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        # Clean the Roboflow filename to match the target
        match = re.match(r"^(.*)_jpg\.rf\..*\.jpg$", img_file)
        cleaned_name = match.group(1) + ".jpg" if match else img_file

        if cleaned_name in target_patches:
            img_path = os.path.join(class_dir, img_file)
            img = Image.open(img_path).convert("L")
            bright = np.array(img).mean()

            threshold = brightness_thresholds.get(class_name, 100)
            light_cond = "shadow" if bright < threshold else "sun"

            brightness_records.append({
                "patch_filename": cleaned_name,
                "true_label": class_name,
                "brightness": bright,
                "light_condition_new": light_cond
            })

# === Convert to DataFrame and show results ===
df_brightness = pd.DataFrame(brightness_records)
print(df_brightness)

# Optional: Save to CSV
df_brightness.to_csv("new_patch_brightness.csv", index=False)


         patch_filename      true_label  brightness light_condition_new
0  F21_0182_patch_4.jpg     Clear Water   56.551120                 sun
1   F2_0087_patch_2.jpg  Water-starwort   22.432258              shadow


In [None]:
# Sort first by segment (e.g., 'F2', 'F3', etc.) and then by patch_filename within each segment
df_sorted_segment = df_combined.sort_values(by=["segment", "photo_id", "patch_id"]).reset_index(drop=True)

# Save the sorted CSV
output_segment_path = "/mnt/data/extended_test_brightness_sorted_by_segment.csv"
df_sorted_segment.to_csv(output_segment_path, index=False)

# Display the first few rows to confirm sorting
df_sorted_segment.head()
