## imports and functions

In [None]:
import os
import pandas as pd
import cv2
import scipy.io as sio
from glob import glob
import shutil


# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Function to delete the "new_name" folder if needed
def delete_new_name_folder(path):
    new_name_folder = os.path.join(path, "new_name")
    if os.path.exists(new_name_folder):
        shutil.rmtree(new_name_folder)
        print(f"Deleted existing 'new_name' folder at: {new_name_folder}")

## 2023 Data

In [None]:
# Load the Excel file to inspect the columns
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/combined2023.xlsx'
df = pd.read_excel(excel_path)

# Display the column names to verify the actual names in the Excel file
print("Column names in the Excel file:", df.columns.tolist())


Column names in the Excel file: ['Tree number', 'Direction north =1. South=2', 'Mandarin number', 'Thermal20230825', 'RGB20230825', 'Thermal20230929', 'RGB20230929', 'Thermal20231103', 'RGB20231103', 'Thermal20231216', 'RGB20231216']


Thermal and RGB images 2023

In [None]:
import os
import shutil
import pandas as pd

# Load the Excel file
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/combined2023.xlsx'
df = pd.read_excel(excel_path)

# Define the base directory where the raw data images are located
base_dir = '/content/drive/Othercomputers/My PC/Thesis/raw_data'

# Define the paths for each date
dates = ['20230825', '20230929', '20231103', '20231216']

# List to store missing images
missing_images = []

# Process each image and rename them according to the rules
for index, row in df.iterrows():
    tree_number = row['Tree number']
    direction = row['Direction north =1. South=2'].strip().lower()  # Ensure lower case and remove any whitespace
    mandarin_number = row['Mandarin number']

    # Convert direction value to 'n' or 's' based on the first character
    direction_char = 'n' if direction.startswith('n') else 's'

    # Construct the image prefix based on these columns
    image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

    # Process each date column for thermal, RGB, and MAT images
    for col in df.columns[3:]:  # Start from the 4th column
        image_date = col[-8:]  # Extract the date part from the column name
        image_type = 'RGB' if 'RGB' in col else 'Thermal' if 'Thermal' in col else 'Thermal.MAT'
        image_id = str(row[col])

        # Skip if the ID is NaN, blank, or "X"
        if pd.isna(image_id) or image_id.strip() == '' or image_id.strip().upper() == 'X':
            continue

        # Set the folder path based on the date and image type
        if image_date == '20230825':
            subfolder_path = os.path.join(base_dir, image_date, 'Prediction', image_type)
        else:
            subfolder_path = os.path.join(base_dir, image_date, 'prediction', 'raw data', image_type)

        # Check if the subfolder path exists
        if not os.path.exists(subfolder_path):
            print(f"Directory does not exist: {subfolder_path}")
            continue

        # Define the search pattern to find the image by ID
        search_pattern = f"{image_id}"

        # Search for the file in the specified directory
        found_image = False
        for filename in os.listdir(subfolder_path):
            if search_pattern in filename:
                found_image = True
                file_ext = filename.split('.')[-1]
                new_filename = f"{image_prefix}_{image_date}.{file_ext}"

                # Define the target directory for saving renamed images
                target_dir = os.path.join(subfolder_path, "new_name")
                os.makedirs(target_dir, exist_ok=True)

                # Define full paths for source and target files
                dst_path = os.path.join(target_dir, new_filename)

                # Check if the file already exists in the "new_name" folder
                if os.path.exists(dst_path):
                    print(f"Image {new_filename} already exists in {target_dir}, skipping.")
                else:
                    # Copy the file with the new name
                    src_path = os.path.join(subfolder_path, filename)
                    shutil.copy(src_path, dst_path)
                    print(f"Original: {filename} -> New: {new_filename}")

                break

        # If no matching file was found, add to the missing images list
        if not found_image:
            missing_images.append((image_id, image_date, image_type, subfolder_path))
            print(f"Image {image_id} for date {image_date} not found in {subfolder_path}")

# Print all missing images at the end
print("\nSummary of missing images:")
for image_id, image_date, image_type, subfolder_path in missing_images:
    print(f"Missing: Image ID {image_id} of type {image_type} for date {image_date} in folder {subfolder_path}")


Image 816 for date 20230825 not found in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/Thermal
Image 1_n_1_20230825.JPG already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/RGB/new_name, skipping.
Image 1_n_1_20230929.jpg already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230929/prediction/raw data/Thermal/new_name, skipping.
Image 1_n_1_20230929.JPG already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230929/prediction/raw data/RGB/new_name, skipping.
Image 1_n_1_20231103.jpg already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20231103/prediction/raw data/Thermal/new_name, skipping.
Image 1_n_1_20231103.JPG already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20231103/prediction/raw data/RGB/new_name, skipping.
Image 1_n_1_20231216.jpg already exists in /content/drive/Othercomputers/My PC/Thesis/raw_data/20231216/prediction/raw data/Thermal/new_name

MAT files 2023

In [None]:

# Load the Excel file
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/combined2023.xlsx'
df = pd.read_excel(excel_path)

# Define the base directory where the raw data images are located
base_dir = '/content/drive/Othercomputers/My PC/Thesis/raw_data'

# Define the dates and the specific folder for Thermal.MAT files
dates = ['20230825', '20230929', '20231103', '20231216']
missing_images = []  # List to store missing images

# Process each image in the Thermal.MAT folders
for index, row in df.iterrows():
    tree_number = row['Tree number']
    direction = row['Direction north =1. South=2'].strip().lower()  # Ensure lower case and remove any whitespace
    mandarin_number = row['Mandarin number']

    # Convert direction value to 'n' or 's' based on the first character
    direction_char = 'n' if direction.startswith('n') else 's'

    # Construct the image prefix based on these columns
    image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

    # Process each date column only for `Thermal.MAT`
    for col in [col for col in df.columns if col.startswith('Thermal')]:  # Only Thermal columns
        image_date = col[-8:]  # Extract the date part from the column name
        image_type = 'Thermal.MAT'
        image_id = str(row[col])

        # Skip if the ID is NaN or blank
        if pd.isna(image_id) or image_id.strip() == '':
            continue

        # Set the folder path specifically for `Thermal.MAT` files
        if image_date == '20230825':
            subfolder_path = os.path.join(base_dir, image_date, 'Prediction', image_type)
        else:
            subfolder_path = os.path.join(base_dir, image_date, 'prediction', 'raw data', image_type)

        # Check if the subfolder path exists
        if not os.path.exists(subfolder_path):
            print(f"Directory does not exist: {subfolder_path}")
            continue

        # Define the search pattern to find the image by ID
        search_pattern = f"{image_id}"

        # Search for the file in the specified directory
        found_image = False
        for filename in os.listdir(subfolder_path):
            if search_pattern in filename:
                found_image = True
                file_ext = filename.split('.')[-1]
                new_filename = f"{image_prefix}_{image_date}.{file_ext}"

                # Define the target directory for saving renamed images
                target_dir = os.path.join(subfolder_path, "new_name")
                os.makedirs(target_dir, exist_ok=True)

                # Define full paths for source and target files
                src_path = os.path.join(subfolder_path, filename)
                dst_path = os.path.join(target_dir, new_filename)

                # Copy the file with the new name
                shutil.copy(src_path, dst_path)

                # Print original and new file names for verification
                print(f"Original: {filename} -> New: {new_filename}")

                break

        # If no matching file was found, add to the missing images list
        if not found_image:
            missing_images.append((image_id, image_date, image_type, subfolder_path))
            print(f"Image {image_id} for date {image_date} not found in {subfolder_path}")

# Step 3: Print all missing images at the end
print("\nSummary of missing images:")
for image_id, image_date, image_type, subfolder_path in missing_images:
    print(f"Missing: Image ID {image_id} of type {image_type} for date {image_date} in folder {subfolder_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Image 816 for date 20230825 not found in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/Thermal.MAT
Original: FLIR1298.MAT -> New: 1_n_1_20230929.MAT
Original: FLIR1651.MAT -> New: 1_n_1_20231103.MAT
Original: FLIR1945.MAT -> New: 1_n_1_20231216.MAT
Image 817 for date 20230825 not found in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/Thermal.MAT
Original: FLIR1299.MAT -> New: 1_n_2_20230929.MAT
Original: FLIR1652.MAT -> New: 1_n_2_20231103.MAT
Original: FLIR1946.MAT -> New: 1_n_2_20231216.MAT
Image 818 for date 20230825 not found in /content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/Thermal.MAT
Original: FLIR1300.MAT -> New: 1_n_3_20230929.MAT
Original: FLIR1653.MAT -> New: 1_n_3_20231103.MAT
Original: FLIR1947.MAT -> New: 1_n_3_20231216.MAT
Original: FLIR0819.MAT -> New: 1_n_4_20230825.

tiff files

In [None]:
import os
import pandas as pd
import re
from glob import glob

# Load the Excel file
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/combined2023.xlsx'
df = pd.read_excel(excel_path)

# Define the TIFF directories
tiff_dirs = [
    '/content/drive/Othercomputers/My PC/Thesis/raw_data/20231216/prediction/raw data/tiff',
    '/content/drive/Othercomputers/My PC/Thesis/raw_data/20231103/prediction/raw data/tiff',
    '/content/drive/Othercomputers/My PC/Thesis/raw_data/20230929/prediction/raw data/tiff',
    '/content/drive/Othercomputers/My PC/Thesis/raw_data/20230825/Prediction/tiff'
]

# Helper function to build the new file name based on DataFrame row
def build_new_name(row, date, file_extension):
    tree_number = str(row['Tree number'])
    direction = 'n' if row['Direction north =1. South=2'].strip().lower().startswith('n') else 's'
    mandarin_number = str(row['Mandarin number'])
    return f"{tree_number}_{direction}_{mandarin_number}_{date}.{file_extension}"

# Process each TIFF directory
for tiff_dir in tiff_dirs:
    date_match = re.search(r'/(\d{8})/', tiff_dir)
    if not date_match:
        print(f"Date not found in path: {tiff_dir}")
        continue
    date = date_match.group(1)

    # Create the "new_name" folder within the TIFF directory
    new_name_dir = os.path.join(tiff_dir, 'new_name')
    os.makedirs(new_name_dir, exist_ok=True)

    # Get all TIFF files in the directory
    tiff_files = glob(os.path.join(tiff_dir, '*.tiff'))

    for tiff_file in tiff_files:
        # Extract the original ID from the TIFF file name (e.g., "1866" from "FLIR1866.tiff")
        original_id = re.search(r'(\d{4})', os.path.basename(tiff_file))
        if not original_id:
            print(f"No ID found in file name: {tiff_file}")
            continue
        original_id = original_id.group(1)

        # Find the matching row in the Excel file
        matching_row = df[(df[f'Thermal{date}'] == int(original_id))]

        if matching_row.empty:
            print(f"Could not find matching ID for: {tiff_file}")
            continue

        # Build the new name based on the found row
        new_name = build_new_name(matching_row.iloc[0], date, 'tiff')

        # Path to save the renamed file
        new_path = os.path.join(new_name_dir, new_name)

        # Rename (actually, copy) the file with the new name
        os.rename(tiff_file, new_path)
        print(f"Original: {os.path.basename(tiff_file)} -> New: {new_name}")

print("Renaming process for TIFF files completed.")


Original: FLIR1866.tiff -> New: 1_s_1_20231216.tiff
Original: FLIR1867.tiff -> New: 1_s_2_20231216.tiff
Original: FLIR1868.tiff -> New: 1_s_3_20231216.tiff
Original: FLIR1869.tiff -> New: 1_s_4_20231216.tiff
Original: FLIR1870.tiff -> New: 1_s_5_20231216.tiff
Original: FLIR1871.tiff -> New: 1_s_6_20231216.tiff
Original: FLIR1872.tiff -> New: 2_s_1_20231216.tiff
Original: FLIR1873.tiff -> New: 2_s_3_20231216.tiff
Original: FLIR1874.tiff -> New: 2_s_4_20231216.tiff
Original: FLIR1875.tiff -> New: 2_s_5_20231216.tiff
Original: FLIR1876.tiff -> New: 2_s_6_20231216.tiff
Original: FLIR1877.tiff -> New: 2_s_7_20231216.tiff
Original: FLIR1878.tiff -> New: 3_s_1_20231216.tiff
Original: FLIR1879.tiff -> New: 3_s_2_20231216.tiff
Original: FLIR1881.tiff -> New: 3_s_4_20231216.tiff
Original: FLIR1882.tiff -> New: 3_s_5_20231216.tiff
Original: FLIR1883.tiff -> New: 3_s_6_20231216.tiff
Could not find matching ID for: /content/drive/Othercomputers/My PC/Thesis/raw_data/20231216/prediction/raw data/tif

## 2024 data

thermal and rgb

In [None]:
# Process images for only the first Excel file and its paths
def process_images_for_first_excel():
    # Load the Excel file
    excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240830/Beit Habad 30.08.24 prediction.xlsx'
    df = pd.read_excel(excel_path)

    # Define the paths for RGB and Thermal images
    rgb_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240830/raw data/RGB'
    thermal_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240830/raw data/Thermal'

    # Delete existing "new_name" folders
    delete_new_name_folder(rgb_path)
    delete_new_name_folder(thermal_path)

    # Set to track missing images uniquely
    missing_images = set()

    # Process each row in the DataFrame
    for index, row in df.iterrows():
        tree_number = row['Tree number']
        direction = str(row['Direction']).strip().lower()
        mandarin_number = row['Mandarin number']

        # Convert direction to 'e' or 'w'
        direction_char = 'e' if direction.startswith('e') else 'w'

        # Construct the image prefix based on these columns
        image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

        # Process RGB and Thermal columns specifically for this Excel
        for col in ['RGB20240830', 'Thermal20240830']:
            image_type = 'RGB' if 'RGB' in col else 'Thermal'
            image_id = str(row[col]).strip()

            # Skip if the ID is 'X' or NaN or an empty string
            if image_id == 'X' or pd.isna(row[col]) or image_id == '':
                continue

            # Set the folder path and expected extension based on image type
            if image_type == 'RGB':
                subfolder_path = rgb_path
                expected_ext = '.JPG'  # RGB images end with .JPG (uppercase)
            else:
                subfolder_path = thermal_path
                expected_ext = '.jpg'  # Thermal images end with .jpg (lowercase)

            # Ensure the directory exists
            if not os.path.exists(subfolder_path):
                print(f"Directory does not exist: {subfolder_path}")
                continue

            # Define the search pattern for the image ID in the filenames
            search_pattern = f"{int(image_id):04d}"  # Format ID as zero-padded (e.g., 0029)

            # Search for the image file in the specified directory
            found_image = False
            for filename in os.listdir(subfolder_path):
                if search_pattern in filename and filename.endswith(expected_ext):
                    found_image = True
                    file_ext = filename.split('.')[-1]
                    new_filename = f"{image_prefix}_20240830_{image_type}.{file_ext}"

                    # Define the target directory for saving renamed images
                    target_dir = os.path.join(subfolder_path, "new_name_4")
                    os.makedirs(target_dir, exist_ok=True)

                    # Define full paths for source and target files
                    dst_path = os.path.join(target_dir, new_filename)

                    # Check if the file already exists in the "new_name" folder
                    if os.path.exists(dst_path):
                        print(f"Image {new_filename} already exists in {target_dir}, skipping.")
                    else:
                        # Copy the file with the new name
                        src_path = os.path.join(subfolder_path, filename)
                        shutil.copy(src_path, dst_path)
                        print(f"Original: {filename} -> New: {new_filename}")

                    break

            # If no matching file was found, add to the missing images set
            if not found_image:
                missing_images.add((image_id, image_type, subfolder_path))

    # Print all unique missing images at the end of processing for this Excel
    print(f"\nSummary of missing images for date 20240830:")
    for image_id, image_type, subfolder_path in sorted(missing_images):
        print(f"Missing: Image ID {image_id} of type {image_type} in folder {subfolder_path}")

# Run the process for the first Excel file only
process_images_for_first_excel()


In [None]:
# Process images for only the first Excel file and its paths
def process_images_for_first_excel():
    # Load the Excel file
    excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/Beit Habad 27.09.24 prediction.xlsx'
    df = pd.read_excel(excel_path)

    # Define the paths for RGB and Thermal images
    rgb_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/raw data/RGB'
    thermal_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/raw data/Thermal'

    # Delete existing "new_name" folders
    delete_new_name_folder(rgb_path)
    delete_new_name_folder(thermal_path)

    # Set to track missing images uniquely
    missing_images = set()

    # Process each row in the DataFrame
    for index, row in df.iterrows():
        tree_number = row['Tree number']
        direction = str(row['Direction']).strip().lower()
        mandarin_number = row['Mandarin number']

        # Convert direction to 'e' or 'w'
        direction_char = 'e' if direction.startswith('e') else 'w'

        # Construct the image prefix based on these columns
        image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

        # Process RGB and Thermal columns specifically for this Excel
        for col in ['RGB20240927', 'Thermal20240927']:
            image_type = 'RGB' if 'RGB' in col else 'Thermal'
            image_id = str(row[col]).strip()

            # Skip if the ID is 'X' or NaN or an empty string
            if image_id == 'X' or pd.isna(row[col]) or image_id == '':
                continue

            # Set the folder path and expected extension based on image type
            if image_type == 'RGB':
                subfolder_path = rgb_path
                expected_ext = '.JPG'  # RGB images end with .JPG (uppercase)
            else:
                subfolder_path = thermal_path
                expected_ext = '.jpg'  # Thermal images end with .jpg (lowercase)

            # Ensure the directory exists
            if not os.path.exists(subfolder_path):
                print(f"Directory does not exist: {subfolder_path}")
                continue

            # Define the search pattern for the image ID in the filenames
            search_pattern = f"{int(image_id):04d}"  # Format ID as zero-padded (e.g., 0029)

            # Search for the image file in the specified directory
            found_image = False
            for filename in os.listdir(subfolder_path):
                if search_pattern in filename and filename.endswith(expected_ext):
                    found_image = True
                    file_ext = filename.split('.')[-1]
                    new_filename = f"{image_prefix}_20240927_{image_type}.{file_ext}"

                    # Define the target directory for saving renamed images
                    target_dir = os.path.join(subfolder_path, "new_name_4")
                    os.makedirs(target_dir, exist_ok=True)

                    # Define full paths for source and target files
                    dst_path = os.path.join(target_dir, new_filename)

                    # Check if the file already exists in the "new_name" folder
                    if os.path.exists(dst_path):
                        print(f"Image {new_filename} already exists in {target_dir}, skipping.")
                    else:
                        # Copy the file with the new name
                        src_path = os.path.join(subfolder_path, filename)
                        shutil.copy(src_path, dst_path)
                        print(f"Original: {filename} -> New: {new_filename}")

                    break

            # If no matching file was found, add to the missing images set
            if not found_image:
                missing_images.add((image_id, image_type, subfolder_path))

    # Print all unique missing images at the end of processing for this Excel
    print(f"\nSummary of missing images for date 20240927:")
    for image_id, image_type, subfolder_path in sorted(missing_images):
        print(f"Missing: Image ID {image_id} of type {image_type} in folder {subfolder_path}")

# Run the process for the first Excel file only
process_images_for_first_excel()


Deleted existing 'new_name_4' folder at: /content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/raw data/RGB/new_name_4
Deleted existing 'new_name_4' folder at: /content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/raw data/Thermal/new_name_4
Original: 1D3A2895.JPG -> New: 1_e_1_20240927_RGB.JPG
Original: IR_27-09-2024_0009.jpg -> New: 1_e_1_20240927_Thermal.jpg
Original: 1D3A2896.JPG -> New: 1_e_2_20240927_RGB.JPG
Original: IR_27-09-2024_0010.jpg -> New: 1_e_2_20240927_Thermal.jpg
Original: 1D3A2897.JPG -> New: 1_e_3_20240927_RGB.JPG
Original: IR_27-09-2024_0011.jpg -> New: 1_e_3_20240927_Thermal.jpg
Original: 1D3A2898.JPG -> New: 1_e_4_20240927_RGB.JPG
Original: IR_27-09-2024_0012.jpg -> New: 1_e_4_20240927_Thermal.jpg
Original: 1D3A2899.JPG -> New: 1_e_5_20240927_RGB.JPG
Original: IR_27-09-2024_0013.jpg -> New: 1_e_5_20240927_Thermal.jpg
Original: 1D3A2900.JPG -> New: 1_e_6_20240927_RGB.JPG
Original: IR_27-09-2024_0014.jpg -> New: 1_e_6_20240927_Thermal.jpg
Original: 1D

In [None]:
# Process images for only the first Excel file and its paths
def process_images_for_first_excel():
    # Load the Excel file
    excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241102/Beit Habad 02.11.24 prediction.xlsx'
    df = pd.read_excel(excel_path)

    # Define the paths for RGB and Thermal images
    rgb_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241102/raw data/RGB'
    thermal_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241102/raw data/Thermal'

    # Delete existing "new_name" folders
    delete_new_name_folder(rgb_path)
    delete_new_name_folder(thermal_path)

    # Set to track missing images uniquely
    missing_images = set()

    # Process each row in the DataFrame
    for index, row in df.iterrows():
        tree_number = row['Tree number']
        direction = str(row['Direction']).strip().lower()
        mandarin_number = row['Mandarin number']

        # Convert direction to 'e' or 'w'
        direction_char = 'e' if direction.startswith('e') else 'w'

        # Construct the image prefix based on these columns
        image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

        # Process RGB and Thermal columns specifically for this Excel
        for col in ['RGB20241102', 'Thermal20241102']:
            image_type = 'RGB' if 'RGB' in col else 'Thermal'
            image_id = str(row[col]).strip()

            # Skip if the ID is 'X' or NaN or an empty string
            if image_id == 'X' or pd.isna(row[col]) or image_id == '':
                continue

            # Set the folder path and expected extension based on image type
            if image_type == 'RGB':
                subfolder_path = rgb_path
                expected_ext = '.JPG'  # RGB images end with .JPG (uppercase)
            else:
                subfolder_path = thermal_path
                expected_ext = '.jpg'  # Thermal images end with .jpg (lowercase)

            # Ensure the directory exists
            if not os.path.exists(subfolder_path):
                print(f"Directory does not exist: {subfolder_path}")
                continue

            # Define the search pattern for the image ID in the filenames
            search_pattern = f"{int(image_id):04d}"  # Format ID as zero-padded (e.g., 0029)

            # Search for the image file in the specified directory
            found_image = False
            for filename in os.listdir(subfolder_path):
                if search_pattern in filename and filename.endswith(expected_ext):
                    found_image = True
                    file_ext = filename.split('.')[-1]
                    new_filename = f"{image_prefix}_20241102_{image_type}.{file_ext}"

                    # Define the target directory for saving renamed images
                    target_dir = os.path.join(subfolder_path, "new_name_4")
                    os.makedirs(target_dir, exist_ok=True)

                    # Define full paths for source and target files
                    dst_path = os.path.join(target_dir, new_filename)

                    # Check if the file already exists in the "new_name" folder
                    if os.path.exists(dst_path):
                        print(f"Image {new_filename} already exists in {target_dir}, skipping.")
                    else:
                        # Copy the file with the new name
                        src_path = os.path.join(subfolder_path, filename)
                        shutil.copy(src_path, dst_path)
                        print(f"Original: {filename} -> New: {new_filename}")

                    break

            # If no matching file was found, add to the missing images set
            if not found_image:
                missing_images.add((image_id, image_type, subfolder_path))

    # Print all unique missing images at the end of processing for this Excel
    print(f"\nSummary of missing images for date 20241102:")
    for image_id, image_type, subfolder_path in sorted(missing_images):
        print(f"Missing: Image ID {image_id} of type {image_type} in folder {subfolder_path}")

# Run the process for the first Excel file only
process_images_for_first_excel()


Original: 1D3A4661.JPG -> New: 1_e_1_20241102_RGB.JPG
Original: IR_02-11-2024_0001.jpg -> New: 1_e_1_20241102_Thermal.jpg
Original: 1D3A4662.JPG -> New: 1_e_4_20241102_RGB.JPG
Original: IR_02-11-2024_0002.jpg -> New: 1_e_4_20241102_Thermal.jpg
Original: 1D3A4663.JPG -> New: 1_e_5_20241102_RGB.JPG
Original: IR_02-11-2024_0003.jpg -> New: 1_e_5_20241102_Thermal.jpg
Original: 1D3A4664.JPG -> New: 1_e_6_20241102_RGB.JPG
Original: IR_02-11-2024_0004.jpg -> New: 1_e_6_20241102_Thermal.jpg
Original: 1D3A4665.JPG -> New: 1_e_7_20241102_RGB.JPG
Original: IR_02-11-2024_0005.jpg -> New: 1_e_7_20241102_Thermal.jpg
Original: 1D3A4666.JPG -> New: 2_e_1_20241102_RGB.JPG
Original: IR_02-11-2024_0006.jpg -> New: 2_e_1_20241102_Thermal.jpg
Original: 1D3A4667.JPG -> New: 2_e_2_20241102_RGB.JPG
Original: IR_02-11-2024_0007.jpg -> New: 2_e_2_20241102_Thermal.jpg
Original: 1D3A4668.JPG -> New: 2_e_3_20241102_RGB.JPG
Original: IR_02-11-2024_0008.jpg -> New: 2_e_3_20241102_Thermal.jpg
Original: 1D3A4669.JPG -

In [None]:
# Process images for only the first Excel file and its paths
def process_images_for_first_excel():
    # Load the Excel file
    excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241129/Beit Habad 29.11.24 prediction.xlsx'
    df = pd.read_excel(excel_path)

    # Define the paths for RGB and Thermal images
    rgb_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241129/raw data/RGB'
    thermal_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241129/raw data/Thermal'

    # Delete existing "new_name" folders
    delete_new_name_folder(rgb_path)
    delete_new_name_folder(thermal_path)

    # Set to track missing images uniquely
    missing_images = set()

    # Process each row in the DataFrame
    for index, row in df.iterrows():
        tree_number = row['Tree number']
        direction = str(row['Direction']).strip().lower()
        mandarin_number = row['Mandarin number']

        # Convert direction to 'e' or 'w'
        direction_char = 'e' if direction.startswith('e') else 'w'

        # Construct the image prefix based on these columns
        image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

        # Process RGB and Thermal columns specifically for this Excel
        for col in ['RGB20241129', 'Thermal20241129']:
            image_type = 'RGB' if 'RGB' in col else 'Thermal'
            image_id = str(row[col]).strip()

            # Skip if the ID is 'X' or NaN or an empty string
            if image_id == 'X' or pd.isna(row[col]) or image_id == '':
                continue

            # Set the folder path and expected extension based on image type
            if image_type == 'RGB':
                subfolder_path = rgb_path
                expected_ext = '.JPG'  # RGB images end with .JPG (uppercase)
            else:
                subfolder_path = thermal_path
                expected_ext = '.jpg'  # Thermal images end with .jpg (lowercase)

            # Ensure the directory exists
            if not os.path.exists(subfolder_path):
                print(f"Directory does not exist: {subfolder_path}")
                continue

            # Define the search pattern for the image ID in the filenames
            search_pattern = f"{int(image_id):04d}"  # Format ID as zero-padded (e.g., 0029)

            # Search for the image file in the specified directory
            found_image = False
            for filename in os.listdir(subfolder_path):
                if search_pattern in filename and filename.endswith(expected_ext):
                    found_image = True
                    file_ext = filename.split('.')[-1]
                    new_filename = f"{image_prefix}_20241129_{image_type}.{file_ext}"

                    # Define the target directory for saving renamed images
                    target_dir = os.path.join(subfolder_path, "new_name_4")
                    os.makedirs(target_dir, exist_ok=True)

                    # Define full paths for source and target files
                    dst_path = os.path.join(target_dir, new_filename)

                    # Check if the file already exists in the "new_name" folder
                    if os.path.exists(dst_path):
                        print(f"Image {new_filename} already exists in {target_dir}, skipping.")
                    else:
                        # Copy the file with the new name
                        src_path = os.path.join(subfolder_path, filename)
                        shutil.copy(src_path, dst_path)
                        print(f"Original: {filename} -> New: {new_filename}")

                    break

            # If no matching file was found, add to the missing images set
            if not found_image:
                missing_images.add((image_id, image_type, subfolder_path))

    # Print all unique missing images at the end of processing for this Excel
    print(f"\nSummary of missing images for date 20241129:")
    for image_id, image_type, subfolder_path in sorted(missing_images):
        print(f"Missing: Image ID {image_id} of type {image_type} in folder {subfolder_path}")

# Run the process for the first Excel file only
process_images_for_first_excel()


Original: 1D3A4955.JPG -> New: 1_e_4_20241102_RGB.JPG
Original: IR_30-11-2024_0001.jpg -> New: 1_e_4_20241102_Thermal.jpg
Original: 1D3A4956.JPG -> New: 1_e_5_20241102_RGB.JPG
Original: IR_30-11-2024_0002.jpg -> New: 1_e_5_20241102_Thermal.jpg
Original: 1D3A4957.JPG -> New: 1_e_7_20241102_RGB.JPG
Original: IR_30-11-2024_0003.jpg -> New: 1_e_7_20241102_Thermal.jpg
Original: 1D3A4958.JPG -> New: 2_e_1_20241102_RGB.JPG
Original: IR_30-11-2024_0004.jpg -> New: 2_e_1_20241102_Thermal.jpg
Original: 1D3A4959.JPG -> New: 2_e_2_20241102_RGB.JPG
Original: IR_30-11-2024_0005.jpg -> New: 2_e_2_20241102_Thermal.jpg
Original: 1D3A4960.JPG -> New: 2_e_3_20241102_RGB.JPG
Original: IR_30-11-2024_0006.jpg -> New: 2_e_3_20241102_Thermal.jpg
Original: 1D3A4961.JPG -> New: 2_e_4_20241102_RGB.JPG
Original: IR_30-11-2024_0007.jpg -> New: 2_e_4_20241102_Thermal.jpg
Original: 1D3A4962.JPG -> New: 2_e_5_20241102_RGB.JPG
Original: IR_30-11-2024_0008.jpg -> New: 2_e_5_20241102_Thermal.jpg
Original: 1D3A4963.JPG -

MAT files

In [None]:
import os
import shutil
import pandas as pd

# Function to delete the "new_name" folder if needed
def delete_new_name_folder(path):
    new_name_folder = os.path.join(path, "new_name")
    if os.path.exists(new_name_folder):
        shutil.rmtree(new_name_folder)
        print(f"Deleted existing 'new_name' folder at: {new_name_folder}")

# Function to process MAT files based on date and Excel path
def process_mat_files_for_date(date, excel_path):
    # Define the path for MAT files based on the specified date
    mat_path = f'/content/drive/Othercomputers/My PC/Thesis/raw_data/{date}/raw data/Thermal MAT/original_name'
    target_path = f'/content/drive/Othercomputers/My PC/Thesis/raw_data/{date}/raw data/Thermal MAT/new_name'

    # Load the Excel file
    if not os.path.exists(excel_path):
        print(f"Excel file not found for date {date}: {excel_path}")
        return

    df = pd.read_excel(excel_path)

    # Create target directory for new names if it doesn't exist
    os.makedirs(target_path, exist_ok=True)

    # Set to track missing images uniquely
    missing_images = set()

    # Process each row in the DataFrame
    for index, row in df.iterrows():
        tree_number = row['Tree number']
        direction = str(row['Direction']).strip().lower()
        mandarin_number = row['Mandarin number']

        # Convert direction to 'e' or 'w'
        direction_char = 'e' if direction.startswith('e') else 'w'

        # Construct the image prefix based on these columns
        image_prefix = f"{tree_number}_{direction_char}_{mandarin_number}"

        # Process the Thermal MAT column specifically for this Excel
        mat_col = f"Thermal{date}"

        if mat_col not in df.columns:
            continue  # Skip if the column doesn't exist in the DataFrame

        image_id = str(row[mat_col]).strip()

        # Skip if the ID is 'X' or NaN or an empty string
        if image_id == 'X' or pd.isna(row[mat_col]) or image_id == '':
            continue

        # Define the search pattern for the image ID in the filenames
        search_pattern = f"{int(image_id):04d}"  # Format ID as zero-padded (e.g., 0029)

        # Search for the MAT file in the specified directory
        found_image = False
        for filename in os.listdir(mat_path):
            if search_pattern in filename and filename.endswith('.MAT'):
                found_image = True
                file_ext = filename.split('.')[-1]
                new_filename = f"{image_prefix}_{date}_Thermal.{file_ext}"

                # Define the destination path
                dst_path = os.path.join(target_path, new_filename)

                # Check if the file already exists in the "new_name" folder
                if os.path.exists(dst_path):
                    print(f"Image {new_filename} already exists in {target_path}, skipping.")
                else:
                    # Copy the file with the new name
                    src_path = os.path.join(mat_path, filename)
                    shutil.copy(src_path, dst_path)
                    print(f"Original: {filename} -> New: {new_filename}")

                break

        # If no matching file was found, add to the missing images set
        if not found_image:
            missing_images.add((image_id, mat_path))

    # Print all unique missing images at the end of processing for this date
    print(f"\nSummary of missing MAT files for date {date}:")
    for image_id, mat_path in sorted(missing_images):
        print(f"Missing: Image ID {image_id} in folder {mat_path}")

In [None]:

date = '20240830'
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240830/Beit Habad 30.08.24 prediction.xlsx'
process_mat_files_for_date(date, excel_path)


In [None]:
date = '20240927'
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20240927/Beit Habad 27.09.24 prediction.xlsx'
process_mat_files_for_date(date, excel_path)


Original: IR_27092024_0009.MAT -> New: 1_e_1_20240927_Thermal.MAT
Original: IR_27092024_0010.MAT -> New: 1_e_2_20240927_Thermal.MAT
Original: IR_27092024_0011.MAT -> New: 1_e_3_20240927_Thermal.MAT
Original: IR_27092024_0012.MAT -> New: 1_e_4_20240927_Thermal.MAT
Original: IR_27092024_0013.MAT -> New: 1_e_5_20240927_Thermal.MAT
Original: IR_27092024_0014.MAT -> New: 1_e_6_20240927_Thermal.MAT
Original: IR_27092024_0015.MAT -> New: 1_e_7_20240927_Thermal.MAT
Original: IR_27092024_0016.MAT -> New: 2_e_1_20240927_Thermal.MAT
Original: IR_27092024_0017.MAT -> New: 2_e_2_20240927_Thermal.MAT
Original: IR_27092024_0018.MAT -> New: 2_e_3_20240927_Thermal.MAT
Original: IR_27092024_0019.MAT -> New: 2_e_4_20240927_Thermal.MAT
Original: IR_27092024_0020.MAT -> New: 2_e_5_20240927_Thermal.MAT
Original: IR_27092024_0021.MAT -> New: 2_e_6_20240927_Thermal.MAT
Original: IR_27092024_0022.MAT -> New: 2_e_7_20240927_Thermal.MAT
Original: IR_27092024_0023.MAT -> New: 3_e_1_20240927_Thermal.MAT
Original: 

In [None]:
date = '20241102'
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241102/Beit Habad 02.11.24 prediction.xlsx'
process_mat_files_for_date(date, excel_path)

Original: IR_02112024_0001.MAT -> New: 1_e_1_20241102_Thermal.MAT
Original: IR_02112024_0002.MAT -> New: 1_e_4_20241102_Thermal.MAT
Original: IR_02112024_0003.MAT -> New: 1_e_5_20241102_Thermal.MAT
Original: IR_02112024_0004.MAT -> New: 1_e_6_20241102_Thermal.MAT
Original: IR_02112024_0005.MAT -> New: 1_e_7_20241102_Thermal.MAT
Original: IR_02112024_0006.MAT -> New: 2_e_1_20241102_Thermal.MAT
Original: IR_02112024_0007.MAT -> New: 2_e_2_20241102_Thermal.MAT
Original: IR_02112024_0008.MAT -> New: 2_e_3_20241102_Thermal.MAT
Original: IR_02112024_0009.MAT -> New: 2_e_4_20241102_Thermal.MAT
Original: IR_02112024_0010.MAT -> New: 2_e_5_20241102_Thermal.MAT
Original: IR_02112024_0011.MAT -> New: 2_e_6_20241102_Thermal.MAT
Original: IR_02112024_0012.MAT -> New: 2_e_7_20241102_Thermal.MAT
Original: IR_02112024_0013.MAT -> New: 3_e_1_20241102_Thermal.MAT
Original: IR_02112024_0014.MAT -> New: 3_e_2_20241102_Thermal.MAT
Original: IR_02112024_0015.MAT -> New: 3_e_3_20241102_Thermal.MAT
Original: 

In [None]:
date = '20241129'
excel_path = '/content/drive/Othercomputers/My PC/Thesis/raw_data/20241129/Beit Habad 29.11.24 prediction.xlsx'
process_mat_files_for_date(date, excel_path)

Original: IR_30112024_0001.MAT -> New: 1_e_4_20241129_Thermal.MAT
Original: IR_30112024_0002.MAT -> New: 1_e_5_20241129_Thermal.MAT
Original: IR_30112024_0003.MAT -> New: 1_e_7_20241129_Thermal.MAT
Original: IR_30112024_0004.MAT -> New: 2_e_1_20241129_Thermal.MAT
Original: IR_30112024_0005.MAT -> New: 2_e_2_20241129_Thermal.MAT
Original: IR_30112024_0006.MAT -> New: 2_e_3_20241129_Thermal.MAT
Original: IR_30112024_0007.MAT -> New: 2_e_4_20241129_Thermal.MAT
Original: IR_30112024_0008.MAT -> New: 2_e_5_20241129_Thermal.MAT
Original: IR_30112024_0009.MAT -> New: 2_e_6_20241129_Thermal.MAT
Original: IR_30112024_0010.MAT -> New: 2_e_7_20241129_Thermal.MAT
Original: IR_30112024_0011.MAT -> New: 3_e_1_20241129_Thermal.MAT
Original: IR_30112024_0012.MAT -> New: 3_e_2_20241129_Thermal.MAT
Original: IR_30112024_0013.MAT -> New: 3_e_3_20241129_Thermal.MAT
Original: IR_30112024_0014.MAT -> New: 3_e_4_20241129_Thermal.MAT
Original: IR_30112024_0015.MAT -> New: 3_e_5_20241129_Thermal.MAT
Original: 