<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">KSO Notebook #9: Run machine learning models on footage</h1>
<h3 align="right"><a href="https://colab.research.google.com/github/ocean-data-factory-sweden/kso/blob/main/notebooks/09_Run_ML_Models_on_footage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></h3>
<h3 align="right">Written by the KSO team</h3>

# Set up KSO requirements

### Install all the requirements

Installing the requirements in Google Colab takes ~4 mins and might automatically crash/restart the session. Please run this cell until you get the "Successful installation!" message.

In [1]:
import sys
import os

# Check if notebook is running in colab
IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    # Clone kso repo and install requirements
    if not os.path.exists("kso"):
        print("Installing all dependencies...")
        !git clone https://github.com/ocean-data-factory-sweden/kso.git
        !pip install -r /content/kso/requirements_colab.txt

    # Enable external widgets and navigate to the kso tutorial folder
    try:
        from google.colab import output

        output.enable_custom_widget_manager()
        os.chdir("kso/notebooks")
    except ImportError:
        pass

# Prepare the dev settings if needed
try:
    if "kso_utils" not in sys.modules:
        sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), "..")))
        import kso_utils

        print("Using development version...")
        # Enables testing changes in utils
        %load_ext autoreload
        %autoreload 2
except ImportError:
    print("Installing latest version from PyPI...")
    %pip install -q kso-utils

if IN_COLAB:

    def restart_runtime():
        os.kill(os.getpid(), 9)

    # Check if there are any issues with previously imported packages
    try:
        from kso_utils.project import ProjectProcessor
    except Exception as e:
        print(f"Error importing package: {e}")
        print("Restarting runtime to apply package changes...")
        restart_runtime()

# Avoid issues with widgets not displaying properly
!jupyter nbextension enable --user --py widgetsnbextension
!jupyter nbextension enable --user --py jupyter_bbox_widget
!jupyter nbextension enable --user --py ipysheet

# Load the clear output function to keep things clean
from IPython.display import clear_output

clear_output()
print("Successful installation... you're good to go!")

Successful installation... you're good to go!


### Import Python packages

In [2]:
# Import required modules
%matplotlib inline
import kso_utils.widgets as kso_widgets
import kso_utils.project_utils as p_utils
import kso_utils.yolo_utils as y_utils
from kso_utils.project import ProjectProcessor, MLProjectProcessor

print("Packages loaded successfully")

Matplotlib created a temporary cache directory at /tmp/matplotlib-tf_b296e because the default path (/data/cache/jhub/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


Packages loaded successfully


### Choose your project

In [3]:
project_name = kso_widgets.choose_project()

Dropdown(description='Project:', options=('Template project', 'Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU'…

### Initiate project's database

In [4]:
# Find project
project = p_utils.find_project(project_name=project_name.value)
# Initialise pp
pp = ProjectProcessor(project)

INFO:root:Koster_Seafloor_Obs loaded succesfully
INFO:root:Running locally, no external connection to server needed.
INFO:root:Running locally so no csv files were downloaded from the server.
INFO:root:Updated species table from the temporary database
INFO:root:Updated sites table from the temporary database
INFO:root:Updated photos table from the temporary database
INFO:root:Updated movies table from the temporary database


In [5]:
# Initialise mlp
mlp = MLProjectProcessor(pp)

# Run model on footage

### Choose the model

In [6]:
model = mlp.choose_model()

Dropdown(description='Select model: ', options=(('KSO_SEG_1', 'runs:/07e23f8a0afb411ca984bbbb8ee9e9a4/weights/…

### Choose folder to download the model

In [7]:
download_dir = kso_widgets.choose_folder(".", "where to download the model")

FileChooser(path='.', filename='', title='HTML(value='Choose location of where to download the model')', show_…

### Download model

In [8]:
artifact_dir = mlp.get_model(model.value, download_dir.selected)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

### Choose the footage to run the models into

In [9]:
pp.choose_footage_source()

RadioButtons(description='Choose footage source:', options=('Existing Footage', 'New Footage'), value='Existin…

In [11]:
pp.choose_footage()

FileChooser(path='/buckets/kso/output', filename='', title='HTML(value='Choose location of new footage')', sho…

In [12]:
# Ensure the selected footage and paths are loaded to the system
pp.check_selected_movies()

### Choose folder to save the runs

In [13]:
# This should be left as default value in most cases.
save_dir = kso_widgets.choose_folder(".", "runs output")

FileChooser(path='.', filename='', title='HTML(value='Choose location of runs output')', show_hidden='False', …

### Choose a confidence threshold for evaluation

In [14]:
conf_thres = kso_widgets.choose_conf()

FloatSlider(value=0.5, continuous_update=False, description='Confidence threshold:', max=1.0, readout_format='…

## Choose a suitable experiment name

In [15]:
exp_name = kso_widgets.choose_experiment_name()

Text(value='exp_name', description='Experiment name:', placeholder='Choose an experiment name', style=TextStyl…

### Run model over selected footage

In [17]:
# Get the paths of the movies selected
mlp.detect_yolo(
    save_dir=save_dir.selected,
    conf_thres=conf_thres.value,
    artifact_dir=artifact_dir,
    save_output=True,
    project=mlp.project_name,
    name=exp_name.value,
    model=model.value,
    latest=True,
    source=(
        pp.selected_movies_paths
        if isinstance(pp.selected_movies_paths, str)
        else pp.selected_movies_paths[0]
    ),
)


image 1/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010080.JPG: 480x640 30 2s, 1 5, 1 4, 1 8, 137.4ms
image 2/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010081.JPG: 480x640 29 2s, 2 4s, 1 8, 10.7ms
image 3/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010085.JPG: 480x640 12 2s, 2 4s, 1 8, 10.2ms
image 4/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010086.JPG: 480x640 10 2s, 1 8, 9.9ms
image 5/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010087.JPG: 480x640 12 2s, 1 4, 1 8, 10.9ms
image 6/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010088.JPG: 480x640 9 2s, 4 4s, 1 8, 9.8ms
image 7/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010089.JPG: 480x640 14 2s, 2 4s, 1 8, 9.8ms
image 8/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010091.JPG: 480x640 6 2s, 1 4, 1 8, 9.7ms
image 9/497 /buckets/kso/input/kristineberg_2024_april_SA/G0010092.JPG: 480x640 14 2s, 1 4, 1 8, 9.7ms
image 10/497 /buckets/kso/input/kristineberg_2024_april_SA/G00100

ValueError: not enough values to unpack (expected 6, got 2)

### View the processed footage

In [19]:
kso_widgets.select_viewer()

AttributeError: module 'kso_utils.widgets' has no attribute 'select_viewer'

### Process the detections
Add the metadata associated with the species identified and the movies

In [20]:
dets_df = pp.process_detections(
    project=pp.project,
    db_connection=pp.db_connection,
    csv_paths=pp.csv_paths,
    annotations_csv_path=mlp.eval_dir,
    model_registry=mlp.registry,
    model=model.value,
    team_name=mlp.team_name,
    project_name=mlp.project_name,
)

FileNotFoundError: [Errno 2] No such file or directory: '/data/album/kso/Saga_testing/KSO_Run_2/annotations.csv'

### Plot the processed detections

In [None]:
pp.plot_processed_detections(
    df=dets_df,
    thres=10,  # number of seconds for thresholding in interval
    int_length=10,  # length in seconds of interval for filtering
)

OPTIONAL #1 - Download the processed detections in a csv file for further analysis (e.g. comparisons between citizen scientists and experts)

In [None]:
pp.download_detections_csv(dets_df)

OPTIONAL #2 - Processed classifications with species as columns (For biodiversity purposes)

In [None]:
pp.download_detections_species_cols_csv(
    df=dets_df,
)

OPTIONAL #3 - Download maxN annotations in GBIF/OBIS format (For biodiversity purposes)

In [None]:
pp.download_gbif_occurrences("ml_algorithms", dets_df)

OPTIONAL #4 - Upload csv with labels from previous model detections so that you can aggregate them for biogical analysis (For debugging purposes)

In [None]:
mlp.widget_eval_dir = kso_widgets.choose_folder(
    (
        save_dir.selected
        if "save_dir" in vars() and save_dir.selected is not None
        else "."
    ),
    "runs output",
)

OPTIONAL #4 (Required!) - Load the path of the csv files

In [None]:
mlp.eval_dir = mlp.widget_eval_dir.selected

In [None]:
# END