This notebook reads the output csv of the apollo toilet app. It then copies all images that were labeled as 'Incorrect' or 'Unsure' to an output folder. It also adds the name of the taxonomic order that BioClip classified the image as to the end of the filename.

In [17]:
import pandas as pd
import random
import os
import shutil
from tqdm import tqdm

In [22]:
def extract_order(filename):
    parts = filename.split("/")
    for i, part in enumerate(parts):
        if part == "images" and i + 2 < len(parts):
            return parts[i + 2]  # Extract the order name dynamically
    return "Unknown"

def load_existing_results(output_path):
    if os.path.exists(output_path):
        return pd.read_csv(output_path)
    return pd.DataFrame(columns=['Filename'])

def process_csv(file_path, output_path):
    df = pd.read_csv(file_path)
    
    # Filter rows where 'Response' is 'Incorrect' or 'Unsure' and not already checked
    filtered_df = df[df['Response'].isin(['Incorrect', 'Unsure'])]
    len_filtered_df = len(filtered_df)
    print(f"{len(filtered_df)} images will be copied")
    
    if filtered_df.empty:
        print("No matching rows containing Incorrect or Usure as response were found.")
        return
    
    output_data = []
    
    for _, row in tqdm(filtered_df.iterrows()):
        filename = row['Filename']
        #amount_checked += 1
        
        # Extract the required part of the path
        if "/images/" in filename:
            extracted_path = filename.split("/images/")[-1]
            extracted_path = "images/" + extracted_path
        else:
            print("Invalid filename format.")
            continue
        
        from_path = os.path.join("/Volumes/Apollo/Diopsis_Cameras/App", extracted_path)
        #print(from_path)
        
        # Extract predicted order
        predicted_order = extract_order(filename)
        #print(predicted_order)

        save_filename = os.path.splitext(os.path.basename(extracted_path))[0] + f"_{predicted_order}.jpg"
        #print(save_filename)

        to_path = os.path.join(output_path, save_filename)
        #print(to_path)

        shutil.copy(from_path, to_path)
        #break

In [23]:
file_path = "/Volumes/Apollo/Diopsis_Cameras/App_outputs/user_checked_predictions_10_02_25.csv"
output_path = "/Volumes/Apollo/Diopsis_Cameras/RESULTS_2024/Incorrect_and_Unsure"
process_csv(file_path, output_path)

12688 images will be copied


12688it [00:59, 214.21it/s]
