<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">Run machine learning models on footage</h1>
<h3 align="right"><a href="https://colab.research.google.com/github/ocean-data-factory-sweden/kso/blob/main/notebooks/publish/Publish_observations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></h3>
<h3 align="right">Written by the KSO team</h3>

# Set up KSO requirements

### Install requirements and load KSO modules

Installing the requirements in Google Colab takes ~4 mins and might automatically crash/restart the session. Please run this cell until you get the "KSO successfully imported!" message.

In [None]:
%matplotlib inline

def initiate_dev_version():
    kso_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
    if os.path.isdir(os.path.join(kso_path, "kso_utils")):
        sys.path.insert(0, kso_path)
        %load_ext autoreload
        %autoreload 2
        print("Development mode ON - kso-utils added to the system.")
    else:
        raise FileNotFoundError("kso_utils directory not found in the expected path.")

def install_kso_utils():
    !pip install -q kso-utils
    # Temporary workaround to install panoptes from the source (avoid requests incompatibility)
    !pip install git+https://github.com/zooniverse/panoptes-python-client.git
    print("Restarting runtime to apply package changes...")
    os.kill(os.getpid(), 9)

try:
    import kso_utils.widgets as kso_widgets
    import kso_utils.project_utils as p_utils
    import kso_utils.yolo_utils as y_utils
    from kso_utils.project import ProjectProcessor, MLProjectProcessor
    print("KSO successfully imported!")
except Exception as e:
    print(f"Error importing kso modules: {e}")
    try:
        initiate_dev_version()
        import kso_utils.widgets as kso_widgets
        import kso_utils.project_utils as p_utils
        import kso_utils.yolo_utils as y_utils
        from kso_utils.project import ProjectProcessor, MLProjectProcessor
        print("KSO successfully imported!")
    except Exception as e:
        install_kso_utils()

### Choose your project

In [None]:
project_name = kso_widgets.choose_project()

### Initiate project's database

In [None]:
# Find project
project = p_utils.find_project(project_name=project_name.value)
# Initialise pp
pp = ProjectProcessor(project)

In [None]:
# Initialise mlp
mlp = MLProjectProcessor(pp)

# Run model on footage

### Choose the model

In [None]:
model = mlp.choose_model()

### Choose folder to download the model

In [None]:
download_dir = kso_widgets.choose_folder(".", "where to download the model")

### Download model

In [None]:
artifact_dir = mlp.get_model(model.value, download_dir.selected)

### Choose the footage to run the models into

In [None]:
pp.choose_footage_source()

In [None]:
pp.choose_footage()

In [None]:
# Ensure the selected footage and paths are loaded to the system
pp.check_selected_movies()

### Choose folder to save the runs

In [None]:
# This should be left as default value in most cases.
save_dir = kso_widgets.choose_folder(".", "runs output")

### Choose a confidence threshold for evaluation

In [None]:
conf_thres = kso_widgets.choose_conf()

## Choose a suitable experiment name

In [None]:
exp_name = kso_widgets.choose_experiment_name()

### Run model over selected footage

In [None]:
# Get the paths of the movies selected
mlp.detect_yolo(
    save_dir=save_dir.selected,
    conf_thres=conf_thres.value,
    artifact_dir=artifact_dir,
    save_output=True,
    project=mlp.project_name,
    name=exp_name.value,
    model=model.value,
    out_format="yolo",
    source=(
        pp.selected_movies_paths
        if isinstance(pp.selected_movies_paths, str)
        else pp.selected_movies_paths[0]
    ),
)

### View the processed footage

In [None]:
kso_widgets.select_viewer()

### Process the detections
Add the metadata associated with the species identified and the movies

In [None]:
dets_df = pp.process_detections(
    project=pp.project,
    db_connection=pp.db_connection,
    csv_paths=pp.csv_paths,
    annotations_csv_path=mlp.eval_dir,
    model_registry=mlp.registry,
    model=model.value,
    team_name=mlp.team_name,
    project_name=mlp.project_name,
)

### Plot the processed detections

In [None]:
pp.plot_processed_detections(
    df=dets_df,
    thres=10,  # number of seconds for thresholding in interval
    int_length=10,  # length in seconds of interval for filtering
)

OPTIONAL #1 - Download the processed detections in a csv file for further analysis (e.g. comparisons between citizen scientists and experts)

In [None]:
pp.download_detections_csv(dets_df)

OPTIONAL #2 - Processed classifications with species as columns (For biodiversity purposes)

In [None]:
pp.download_detections_species_cols_csv(
    df=dets_df,
)

OPTIONAL #3 - Download maxN annotations in GBIF/OBIS format (For biodiversity purposes)

In [None]:
pp.download_gbif_occurrences("ml_algorithms", dets_df)

OPTIONAL #4 - Upload csv with labels from previous model detections so that you can aggregate them for biogical analysis (For debugging purposes)

In [None]:
mlp.widget_eval_dir = kso_widgets.choose_folder(
    (
        save_dir.selected
        if "save_dir" in vars() and save_dir.selected is not None
        else "."
    ),
    "runs output",
)

OPTIONAL #4 (Required!) - Load the path of the csv files

In [None]:
mlp.eval_dir = mlp.widget_eval_dir.selected

In [None]:
# END