Latest update: 2022-11-21 15:39:55.996287

<a href="https://colab.research.google.com/github/ocean-data-factory-sweden/kso-data-management/blob/main/tutorials/08_Analyse_Aggregate_Zooniverse_Annotations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">Colab KSO Tutorials #8: Analyse / Aggregate Zooniverse classifications</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>

# Set up and requirements

### Install and import Python packages

In [None]:
from IPython.display import clear_output

try:
    import google.colab
    import os

    IN_COLAB = True
    print("Running in Colab...")

    # Clone kso-data-management repo
    !git clone --quiet --recurse-submodules -b main https://github.com/ocean-data-factory-sweden/kso-data-management.git
    !pip install -q --upgrade pip
    !pip install -q -r kso-data-management/requirements.txt

    # Fix libmagic issue
    !apt-get -qq update && apt-get -qq install -y libmagic-dev > /dev/null

    # Enable external widgets
    from google.colab import output

    output.enable_custom_widget_manager()

    os.chdir("kso-data-management/tutorials")
    try:
        clear_output()
        print("All packages are installed and ready to go!")
    except:
        clear_output()
        print("There have been some issues installing the packages!")
except:
    IN_COLAB = False
    import sys

    # Install requirements
    !pip install -q --no-warn-script-location --upgrade pip
    !pip install -qr ../requirements.txt

    !jupyter nbextension install --user --py widgetsnbextension
    !jupyter nbextension enable --user --py widgetsnbextension
    !jupyter nbextension install --user --py jupyter_bbox_widget
    !jupyter nbextension enable --user --py jupyter_bbox_widget

    clear_output()
    print("Running locally... you're good to go!")

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
sys.path.append("..")

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.server_utils as s_utils
import kso_utils.project_utils as p_utils
import kso_utils.t3_utils as t3
import kso_utils.t4_utils as t4
import kso_utils.t5_utils as t5
import kso_utils.t8_utils as t8
from kso_utils.yolo_utils import frame_aggregation
from kso_utils.zooniverse_utils import populate_agg_annotations

print("Packages loaded successfully")

### Choose your project

In [None]:
project_name = t_utils.choose_project()

In [None]:
project = p_utils.find_project(project_name=project_name.value)

### Initiate SQL database and populate sites, movies and species

In [None]:
# Initiate db
db_info_dict = t_utils.initiate_db(project)

In [None]:
# Connect to Zooniverse project
zoo_project = t_utils.connect_zoo_project(project)

### Retrieve Zooniverse information

In [None]:
zoo_info_dict = t_utils.retrieve__populate_zoo_info(
    project=project,
    db_info_dict=db_info_dict,
    zoo_project=zoo_project,
    zoo_info=["subjects", "workflows", "classifications"],
)

# Step 1: Specify the Zooniverse workflow id and version of interest

Note: A manual export in Zooniverse is required to get the most up-to-date classifications here*

Make sure your workflows in Zooniverse have different names to avoid issues while selecting the workflow id

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

In [None]:
# Retrieve classifications from the workflow of interest
class_df = t8.get_classifications(
    wm.checks,
    workflows_df,
    wm.checks["Subject type: #0"],
    zoo_info_dict["classifications"],
    db_info_dict["db_path"],
    project,
)

# Step 2: Aggregate classifications received on the workflow of interest

In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t8.choose_agg_parameters(wm.checks["Subject type: #0"])

In [None]:
agg_class_df, raw_class_df = t8.aggregrate_classifications(
    class_df, wm.checks["Subject type: #0"], project, agg_params
)

# Step 3: Summarise the number of classifications based on the agreement specified

In [None]:
agg_class_df.groupby("label")["subject_ids"].agg("count")

# Step 4: Display the aggregated classifications in a table

In [None]:
# Display the dataframe into a table
t8.launch_table(agg_class_df, wm.checks["Subject type: #0"])

In [None]:
# View annotations
t8.launch_viewer(agg_class_df, wm.checks["Subject type: #0"])

In [None]:
# Launch the classifications_per_subject explorer
t8.explore_classifications_per_subject(raw_class_df, wm.checks["Subject type: #0"])

# Step 5: Frame aggregation (YOLO) - optional

## Prepare the labelled frames

### Select species of interest and path to store the data

In [None]:
# Choose species of interest for model training
species_i = t4.choose_species(db_info_dict)

In [None]:
# Store selected classes of interest
cl = list(species_i.value)
print("The select species are", cl)

In [None]:
# Specify path to store the labelled frames and annotations
output_folder = t_utils.choose_folder(".", "output")

In [None]:
# Select only relevant species frames
agg_class_selected = agg_class_df[agg_class_df.label.isin(cl)]

In [None]:
# Preview aggregated frame information
agg_class_selected.head()

In [None]:
# Add annotations to db
populate_agg_annotations(agg_class_selected, "frame", project)

In [None]:
# Determine your training parameters
percentage_test = t5.choose_test_prop()

In [None]:
# Run the preparation script
frame_aggregation(
    project,
    db_info_dict,
    output_folder.selected,
    percentage_test.value,
    cl,
    (720, 540),
    remove_nulls=True,
    track_frames=False,
    n_tracked_frames=10,
    agg_df=agg_class_selected,
)


# Step 6: Preview and adjust aggregated annotations

In [None]:
t8.get_annotations_viewer(output_folder.selected, species_list=cl)

In [None]:
# END