<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">Colab KSO Tutorials #8: Analyse Zooniverse classifications</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Aug 8th, 2022</h5>

# Set up and requirements

## Install kso_data_management and its requirements

In [None]:
# Clone koster_data_management repo
!git clone --recurse-submodules -b dev https://github.com/ocean-data-factory-sweden/koster_data_management.git
!pip install -r koster_data_management/requirements.txt

# Restart the session to load the latest packages
exit()

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
os.chdir("koster_data_management/tutorials")
sys.path.append('..')

# Enable third-party widgets
from google.colab import output
output.enable_custom_widget_manager()

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.t8_utils as t8

print("Packages loaded successfully")

## Choose your project

In [None]:
project_name = t_utils.choose_project()

Dropdown(description='Project:', options=('Template project', 'Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU'…

## Set up initial information

In [None]:
project = p_utils.find_project(project_name=project_name.value)
db_info_dict, zoo_project, zoo_info_dict = t8.setup_initial_info(project)

### Step 1: Specify the Zooniverse workflow id and version of interest

*Note:  A manual export in Zooniverse is required to get the most up-to-date classifications here**

Make sure your workflows in Zooniverse have different names to avoid issues while selecting the workflow id

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

WidgetMaker(children=(IntText(value=0, description='Number of workflows:', style=DescriptionStyle(description_…

Output()

In [None]:
# Retrieve classifications from the workflow of interest
class_df = t8.get_classifications(wm.checks,
                                   workflows_df,
                                   wm.checks['Subject type: #0'], 
                                   zoo_info_dict["classifications"], 
                                   db_info_dict["db_path"],
                                   project)

### Step 2: Aggregate classifications received on the workflow of interest

In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t8.choose_agg_parameters(wm.checks['Subject type: #0'])

FloatSlider(value=0.8, continuous_update=False, description='Aggregation threshold:', max=1.0, readout_format=…

IntSlider(value=3, continuous_update=False, description='Min numbers of users:', max=15, min=1, style=SliderSt…

FloatSlider(value=0.8, continuous_update=False, description='Object threshold:', max=1.0, readout_format='.1f'…

FloatSlider(value=0.5, continuous_update=False, description='IOU Epsilon:', max=1.0, readout_format='.1f', sty…

FloatSlider(value=0.8, continuous_update=False, description='Inter user agreement:', max=1.0, readout_format='…

In [None]:
agg_class_df, raw_class_df = t8.aggregrate_classifications(class_df, 
                                                            wm.checks['Subject type: #0'], 
                                                            project, 
                                                            agg_params)

### Step 3: Summarise the number of classifications based on the agreement specified

In [None]:
agg_class_df.groupby("label")["subject_ids"].agg("count")

label
Bait              207
Blue cod          280
Other              98
Scarlet wrasse     12
Snapper           234
empty             902
Name: subject_ids, dtype: int64

### Step 4: Display the aggregated classifications in a table

In [None]:
# Display the dataframe into a table
t8.launch_table(agg_class_df, wm.checks['Subject type: #0'])

Unnamed: 0,subject_ids,label,x,y,w,h,https_location,subject_type,filename
0,59602701,Bait,273.0,176.0,51.5,145.5,https://panoptes-uploads.zooniverse.org/subjec...,frame,CON28_2012
1,59602702,Bait,263.0,177.0,65.5,144.5,https://panoptes-uploads.zooniverse.org/subjec...,frame,CON28_2012
2,59602703,Bait,271.0,177.5,47.0,135.0,https://panoptes-uploads.zooniverse.org/subjec...,frame,CON28_2012
3,59602704,Bait,284.0,193.0,47.0,133.0,https://panoptes-uploads.zooniverse.org/subjec...,frame,CON28_2012
4,59602705,Bait,273.5,161.5,44.0,138.5,https://panoptes-uploads.zooniverse.org/subjec...,frame,CON28_2012
...,...,...,...,...,...,...,...,...,...
1728,77370346,empty,,,,,https://panoptes-uploads.zooniverse.org/subjec...,frame,Green canKAP24_2021
1729,77370612,empty,,,,,https://panoptes-uploads.zooniverse.org/subjec...,frame,TUH_035_22_09_2020
1730,77370089,empty,,,,,https://panoptes-uploads.zooniverse.org/subjec...,frame,WP54
1731,77371255,empty,,,,,https://panoptes-uploads.zooniverse.org/subjec...,frame,Onepoto pointKAP20_2020


### Step 5: Use the subject explorer widget to visualise subjects and their aggregated classifications

In [None]:
# Launch the subject viewer
t8.launch_viewer(agg_class_df, wm.checks['Subject type: #0'])

Combobox(value='', description='Subject id:', ensure_option=True, options=('59602701', '59602702', '59602703',…

Output()

### Step 6: Use the subject explorer widget to get more information about specific subjects and their "raw" classifications

In [None]:
# Launch the classifications_per_subject explorer
t8.explore_classifications_per_subject(raw_class_df, wm.checks['Subject type: #0'])

Combobox(value='', description='Subject id:', ensure_option=True, options=('59602738', '59602667', '59602716',…

Output()

In [None]:
# END