In [2]:
import os
import pandas as pd
import shutil
import time

def process_csv_files(csv_folder, image_folder, dest_folder):
    # Ensure the destination folder exists
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    
    # Loop through all CSV files in the csv_folder
    for filename in os.listdir(csv_folder):
        if filename.endswith('.csv'):
            csv_path = os.path.join(csv_folder, filename)
            basename = os.path.splitext(filename)[0]  # Extract the basename of the CSV file
            # Path to the sub-folder corresponding to the basename
            image_subfolder = os.path.join(image_folder, basename)
            # Ensure the Positive Tiles sub-folder exists within the current sub-folder
            positive_tiles_folder = os.path.join(image_subfolder, 'Positive Tiles')
            if not os.path.exists(positive_tiles_folder):
                os.makedirs(positive_tiles_folder)
            
            # List to store names of positive files for the current CSV file
            positive_files = []

            try:
                df = pd.read_csv(csv_path)
                if 'Type' in df.columns and 'File' in df.columns:
                    # Filter the dataframe for positive entries
                    positive_df = df[df['Type'] == 'POSITIVE']
                    # Extend the positive_files list with the filenames
                    positive_files.extend(positive_df['File'].tolist())
                else:
                    print(f"Warning: 'Type' or 'File' column missing in {filename}")
            except Exception as e:
                print(f"Error reading {filename}: {e}")
                continue  # Skip to the next file in case of error

            # Remove duplicates from positive_files list
            positive_files = set(positive_files)

            # Move the images to the Positive Tiles folder
            for image_name in positive_files:
                image_path = os.path.join(image_subfolder, image_name)
                if os.path.exists(image_path):
                    shutil.move(image_path, os.path.join(positive_tiles_folder, image_name))
                else:
                    print(f"Warning: Image {image_name} not found in {image_subfolder}")

if __name__ == "__main__":
    # Define folder paths
    csv_folder = 'C:/Users/kapongo.lumamba/OneDrive - University of KwaZulu-Natal/0 MSc Computer Science/1 Models Testing/Result - svs_v2'
    image_folder = 'C:/Users/kapongo.lumamba/TB_Demo/WSI-SVS-Tiles_v2'
    dest_folder = 'C:/Users/kapongo.lumamba/TB_Demo/WSI-SVS-Tiles_v1/0 PosTiles'

    # Record the start time
    start_time = time.time()
    print(f"Process started at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}")

    # Call the function
    process_csv_files(csv_folder, image_folder, dest_folder)

# Simulate a process with sleep
# Replace this with your actual process code
#time.sleep(5)  # Simulate a 5-second process

    # Record the end time
    end_time = time.time()
    print(f"Process ended at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))}")

    # Calculate and print the duration
    duration = end_time - start_time
    print(f"Duration: {duration:.2f} seconds")


#Basename Extraction: Added basename = os.path.splitext(filename)[0] to extract the base name of each CSV file without the .csv extension.
#Sub-folder Handling: Used the basename to access the corresponding sub-folder in image_folder.
#Positive Tiles Sub-folder: Created a Positive Tiles sub-folder within each corresponding image sub-folder.
#Clearing List: Reset the positive_files list after processing each CSV file.
#With these changes, the script will now properly handle the CSV files, collect positive image filenames, 
    #and move the corresponding images to their respective Positive Tiles sub-folder.

Process started at: 2024-08-22 21:18:33
Process ended at: 2024-08-22 21:57:26
Duration: 2333.56 seconds


In [None]:
### Explanation:
#1. **Libraries**:
   #- `os` is used for interacting with the file system (e.g., listing files, checking paths).
   #- `pandas` is used for reading and processing CSV files.
   #- `shutil` is used for moving files.

#2. **Function `process_csv_files`**:
   #- **csv_folder**: Directory containing the CSV files.
   #- **image_folder**: Directory containing the images.
   #- **dest_folder**: Directory where the positive images will be moved.

#3. **CSV Processing**:
   #- For each CSV file, it checks if the columns `Type` and `File` exist.
   #- Filters rows where `Type` is `POSITIVE` and adds the corresponding `File` names to `positive_files`.

#4. **Image Moving**:
   #- Checks if each image in `positive_files` exists in the `image_folder`.
   #- Moves each existing image to the `dest_folder`.

#5. **Duplicates**:
   #- Removes duplicates from the `positive_files` list to avoid processing the same image multiple times.

### Notes:
#- Replace `'path/to/csv/folder'`, `'path/to/image/folder'`, and `'path/to/Positive Tiles'` with actual paths.
#- Ensure that pandas is installed (`pip install pandas`) and that all necessary directories exist or can be created.

In [None]:

#1. **Read CSV Files**: Loop through each CSV file in a directory and check the column `Type` for the value `POSITIVE`.
#2. **Collect File Names**: For each positive entry in the CSV file, collect the corresponding value from the `File` column into a list.
#3. **Move Images**: Access a directory containing PNG images, and move those images whose names are in the collected list to a new directory called `Positive Tiles`.

import os
import pandas as pd
import shutil

def process_csv_files(csv_folder, image_folder, dest_folder):
    # Ensure the destination folder exists
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    
    # List to store names of positive files
    positive_files = []

    # Loop through all CSV files in the csv_folder
    for filename in os.listdir(csv_folder):
        if filename.endswith('.csv'):
            csv_path = os.path.join(csv_folder, filename)
            try:
                df = pd.read_csv(csv_path)
                if 'Type' in df.columns and 'File' in df.columns:
                    # Filter the dataframe for positive entries
                    positive_df = df[df['Type'] == 'POSITIVE']
                    # Extend the positive_files list with the filenames
                    positive_files.extend(positive_df['File'].tolist())
                else:
                    print(f"Warning: 'Type' or 'File' column missing in {filename}")
            except Exception as e:
                print(f"Error reading {filename}: {e}")

    # Remove duplicates from positive_files list
    positive_files = set(positive_files)

    # Move the images to the destination folder
    for image_name in positive_files:
        image_path = os.path.join(image_folder, image_name)
        if os.path.exists(image_path):
            shutil.move(image_path, os.path.join(dest_folder, image_name))
        else:
            print(f"Warning: Image {image_name} not found in {image_folder}")

if __name__ == "__main__":
    # Define folder paths
    csv_folder = 'Z:/Admin/KAPONGO/0 MSc/WSI for Testing/Result - ndpi_v1'
    image_folder = 'Z:/Admin/KAPONGO/1 TB Model - Python/WSI-NDPI-Tiles_v1'
    dest_folder = 'Z:/Admin/KAPONGO/1 TB Model - Python/WSI-NDPI-Tiles_v1/0 PosTiles'
    
    # Call the function
    process_csv_files(csv_folder, image_folder, dest_folder)


