Code has not been confirmed to be running. Minor errors could appear but should be easy to fix.

##### Implemented:
- Classifies the cropped images using the InsectDetect classifier (https://github.com/maxsitt/insect-detect/tree/main?tab=readme-ov-file), ApolloNet - a self trained neural Network, or BioClip (https://imageomics.github.io/bioclip/) (https://github.com/Imageomics/pybioclip)
- Measures the length of the insects in the cropped images using a custom trained version of the SLEAP pipeline (https://sleap.ai/tutorials/initial-training.html)

In [None]:
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
from PIL import Image, ImageDraw, ImageFont, PngImagePlugin
from typing import Any, List, Dict, Optional, Union, Tuple
from scipy.optimize import linear_sum_assignment
from bioclip import TreeOfLifeClassifier, Rank
from IPython.display import clear_output
import matplotlib.patches as patches
import plotly.graph_objects as go
from dataclasses import dataclass
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from collections import deque
import ipywidgets as widgets
import plotly.express as px
import pandas as pd
import numpy as np
import subprocess
import warnings
import requests
import random
import torch
import glob
import json
import math
import time
import csv
import cv2
import os
import re

### Selfmade functions
from FunktionenZumImportieren.helper_funktionen_clean import *
from FunktionenZumImportieren.settings_widgets import *
from AMT_functions.colors import reportcolors
from AMT_functions.amt_tracker import *

warnings.simplefilter(action='always', category=UserWarning)
warnings.formatwarning = custom_warning_format
warnings.filterwarnings('ignore')

envs_path = get_local_env_path()

#### Define settings variables
##### Variables include:
- pixel_scale: how many pixels are in a cm of image on the current camera. Necessary for outputing mm readings of the insect length. Input `10` if you want to output the length in pixel.
- start_image: The name of a picture can be specified here (with extention). The code will then skip all the files before. Leave empty if you want to analyze all the images in the folder. Please leave this empty in the batch analysis pipeline. Might cause issues. If you want to start at the folder of a specific day, then put the folder number at the first spot in the brackets ([___:]) where the big arrow is in the main cell (<---). If you want to start from the 5th folder for some reason, then put in a 4.
- save_visualisation: activate if you want a visualisation of the detections and tracks saved in "detection_drawings". Not relevant in this notebook!
- rank: Defines the taxonomic rank to which insects should be classified if BioClip is used. Can be set to None. This will make the algorithm classify up to the taxonomic rank that first satisfies the requirement set by certainty_threshold (see the function BioClip_inference in helper_funktionen). Setting rank to None will increase compute time.
- DIOPSIS_folder: diopsis camera folder containing the raw images to be analyzed. This structure is expected: DIOPSIS_folder/photos/folders_of_days

In [None]:
get_values = create_interactive_widgets()

In [None]:
labels = ["insect"]
threshold = 0.25
buffer = 15
pixel_scale = ask_for_pixel_scale()

In [None]:
## On Mac:
DIOPSIS_folder = "/Users/rentaluser/Desktop/DIOPSIS-383"

## On Windows:
#DIOPSIS_folder = r"C:\Users\rolingni\Desktop\input_test4"

Main pipeline cell. All functionallity is included here\
Carefully monitor the printed details since some errors are not cought, but rather only printed. An error summary will appear at the end, telling you whether an error occured during the whole inference or not.
Reminder: This notebook is only working if you have generated crops of individual insects beforehand.\
Crops need to be located in a folder structure similar to this: DIOPSIS_folder/photos/folders_of_individual_days/crops/

In [None]:
start_time = time.time()
length_error = False

global_error = False
classifier = TreeOfLifeClassifier()
tracker = AMTTracker(config)
object_detector = load_grounding_dino_model()

image_folders = [folder for folder in os.listdir(os.path.join(DIOPSIS_folder, "photos")) if not folder.startswith('.')]
image_folders = sorted(image_folders)

for img_folder in image_folders[:]: #  <------------------------------HERE----------------------------------------HERE-------------------------
    image_folder = os.path.join(DIOPSIS_folder, "photos", img_folder)
    start_time = time.time()
    
    images = os.listdir(image_folder)
    images = sorted(images)
    
                
    crops_available, crops_folder, crops_filepaths = are_crops_available(image_folder)
    
    print("Classifying crops now.")
    
    if settings.InsectDetect and crops_available:
        print("InsectDetect classifier running...")
        os.chdir(os.path.join(envs_path, "InsectDetectSAM", "yolov5-cls"))
        print("Working directory is now:", os.getcwd())
        !python classify/predict.py --project {image_folder} --name results --source {crops_folder} --weights "insect-detect-ml-main/models/efficientnet-b0_imgsz128.onnx" --img 128 --sort-top1 --sort-prob --concat-csv
        print("--------")
    
    if settings.ApolloNet and crops_available:
        print("Performing ApolloNet Classification :)")
        Apollo_script_path = os.path.join(envs_path, "ApolloNet", "Intro-to-CV-for-Ecologists-main", "inference.py")
        Apollo_command = f'conda run -n ApolloNet python "{Apollo_script_path}" "{crops_folder}"'
        Apollo = subprocess.run(Apollo_command, shell=True, capture_output=True, text=True)
        if Apollo.returncode != 0:
            print(Apollo.stderr)
            global_error = True
        else:
            print("ApolloNet ran clean\n--------")
    
    if settings.BioClip and crops_available:
        print("The tree of life is growing: running BioClip algorithm for image classification now...")
        BioClip_predictions = BioClip_inference(classifier, crops_filepaths, settings.rank, certainty_threshold = 0.45)
        if len(BioClip_predictions) > 0:
            clean_predictions = process_BioClip_predictions(BioClip_predictions, image_folder, settings.rank)
        print("--------")
        
    if crops_available:
        print("Done classifying the insects. Measuring body lengths now...")
        length_script_path = os.path.join(envs_path, "sleap", "body_length_inference_folder.py")
        command = f'conda run -n sleap python "{length_script_path}" "{crops_folder}" "{pixel_scale}"'
        ran_clean = subprocess.run(command, shell=True, capture_output=True, text=True)
        if ran_clean.returncode != 0:
            print(f"stdout = {ran_clean.stdout}")
            traceback_index = ran_clean.stderr.find("Traceback")
            print(ran_clean.stderr[traceback_index:])
            global_error = True
        else:
            print("Length measurements ran clean\n--------")
        
        merge_result_csvs(image_folder)
        
    
    print("Done measuring. Annotating all results onto cropped images now...")
    results_csv = get_classification_results_csv(image_folder, settings.annotation_algorithm)
    input_folder = os.path.join(image_folder, "cropped_and_annotated")
    length_csv_file_path = os.path.join(image_folder, "results", "body_length_results.csv")
    
    if results_csv is not None and crops_available:
        annotate_classifications(classification_results_csv = results_csv, body_length_csv = length_csv_file_path,
                                 cropped_images_folder = input_folder, image_folder = image_folder, pixel_scale = pixel_scale,
                                annotation_algorithm = settings.annotation_algorithm)
    
    print("--------")
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    
    print(f"Done with day! No length Errors occured :)\nElapsed time: {elapsed_time/60:.2f} minutes \nTime per Image: {elapsed_time/len(images):.2f} seconds")

print("Merging all results csv into one...")
merge_all_results(DIOPSIS_folder)

global_end_time = time.time()
global_elapsed_time = global_end_time - global_start_time

print(f"Time elapsed in total: {(global_elapsed_time/60)/60:.2f} hours")
print(f"Pipeline took {round((global_elapsed_time/len(image_folders))/60, 2)} minutes per day on average")
if not global_error:
    print("All inferences ran clean :)")
else:
    print("WARNING: At least one inference error occured somewhere :(")