<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">Colab KSO Tutorials #8: Analyse Zooniverse classifications</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Aug 8th, 2022</h5>

# Set up and requirements

## Install kso_data_management and its requirements

In [1]:
# Clone koster_data_management repo
!git clone --recurse-submodules -b dev https://github.com/ocean-data-factory-sweden/koster_data_management.git
!pip install -r koster_data_management/requirements.txt

# Prevent ipywidgets display issues
!pip uninstall ipywidgets --y
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

# Restart the session to load the latest packages
exit()

Cloning into 'koster_data_management'...
remote: Enumerating objects: 3001, done.[K
remote: Counting objects: 100% (368/368), done.[K
remote: Compressing objects: 100% (196/196), done.[K
remote: Total 3001 (delta 249), reused 278 (delta 172), pack-reused 2633[K
Receiving objects: 100% (3001/3001), 2.32 MiB | 8.12 MiB/s, done.
Resolving deltas: 100% (2052/2052), done.
Submodule 'kso_utils' (https://github.com/ocean-data-factory-sweden/kso_utils.git) registered for path 'kso_utils'
Cloning into '/content/koster_data_management/kso_utils'...
remote: Enumerating objects: 688, done.        
remote: Counting objects: 100% (64/64), done.        
remote: Compressing objects: 100% (48/48), done.        
remote: Total 688 (delta 34), reused 39 (delta 16), pack-reused 624        
Receiving objects: 100% (688/688), 340.07 KiB | 216.00 KiB/s, done.
Resolving deltas: 100% (465/465), done.
Submodule path 'kso_utils': checked out 'c1a977d8cd3afb35514825c397d7fd2f802ed976'
Looking in indexes: https

Found existing installation: ipywidgets 7.6.5
Uninstalling ipywidgets-7.6.5:
  Successfully uninstalled ipywidgets-7.6.5
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ipywidgets
  Downloading ipywidgets-8.0.1-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 4.4 MB/s 
Collecting widgetsnbextension~=4.0
  Downloading widgetsnbextension-4.0.2-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 39.1 MB/s 
Installing collected packages: widgetsnbextension, ipywidgets
  Attempting uninstall: widgetsnbextension
    Found existing installation: widgetsnbextension 3.5.2
    Uninstalling widgetsnbextension-3.5.2:
      Successfully uninstalled widgetsnbextension-3.5.2
Successfully installed ipywidgets-8.0.1 widgetsnbextension-4.0.2
Enabling notebook extension jupyter-js-widgets/extension...
Paths used for configuration of notebook: 
    	/root/.jupyter/nbconfig/notebook.json
Paths u

## Import Python packages

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
os.chdir("koster_data_management/tutorials")
sys.path.append('..')

# Enable third-party widgets
from google.colab import output
output.enable_custom_widget_manager()

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.t8_utils as t8

print("Packages loaded successfully")

## Choose your project

In [None]:
project_name = t_utils.choose_project()

## Initiate the database for the project

In [None]:
# Store the project selected
project = p_utils.find_project(project_name=project_name.value)

# Initiate db
db_info_dict = t_utils.initiate_db(project)

## Connect to and retrieve info from zooniverse

In [None]:
# Connect to Zooniverse project
zoo_project = t_utils.connect_zoo_project(project)

# Retrieve relevant info from Zooniverse
zoo_info_dict = t_utils.retrieve__populate_zoo_info(
    project = project, 
    db_info_dict = db_info_dict,
    zoo_project = zoo_project,
    zoo_info = ["subjects", "workflows", "classifications"]
)

# Step 1: Specify the Zooniverse workflow id and version of interest

Note: A manual export in Zooniverse is required to get the most up-to-date classifications here*

Make sure your workflows in Zooniverse have different names to avoid issues while selecting the workflow id

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

In [38]:
# Filter classifications from the workflow of interest
class_df = t8.get_classifications(wm.checks,
                                   workflows_df,
                                   wm.checks['Subject type: #0'], 
                                   zoo_info_dict["classifications"], 
                                   db_info_dict["db_path"],
                                   project)

# Step 2: Aggregate classifications received on the workflow of interest

In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t8.choose_agg_parameters(wm.checks['Subject type: #0'])

In [40]:
# Aggregate the classifications
agg_class_df, raw_class_df = t8.aggregrate_classifications(class_df, 
                                                            wm.checks['Subject type: #0'], 
                                                            project, 
                                                            agg_params)

# Step 3: Summarise the number of classifications based on the agreement specified

In [None]:
agg_class_df.groupby("label")["subject_ids"].agg("count")

# Step 4: Display the aggregated classifications in a table

In [None]:
# Display the dataframe into a table
t8.launch_table(agg_class_df, wm.checks['Subject type: #0'])

# Step 5: Use the subject explorer widget to visualise subjects and their aggregated classifications

In [None]:
# Launch the subject viewer
t8.launch_viewer(agg_class_df, wm.checks['Subject type: #0'])

# Step 6: Use the subject explorer widget to get more information about specific subjects and their "raw" classifications

In [44]:
# Launch the classifications_per_subject explorer
t8.explore_classifications_per_subject(raw_class_df, wm.checks['Subject type: #0'])

Combobox(value='', description='Subject id:', ensure_option=True, options=('75299278', '75298964', '75298736',…

Output()

In [None]:
# END