<a href="https://colab.research.google.com/github/wildlifeai/koster_data_management/blob/main/tutorials/COLAB_9_Download_processed_Zooniverse_classifications.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">KSO Tutorial #9: Download processed Zooniverse classifications</h1>
<h3 align="right">Written by KSO Team</h3>

# Set up and requirements

### Install and import Python packages

In [None]:
from IPython.display import clear_output
import os
import sys

try:
    # Enable external widgets
    from google.colab import output

    output.enable_custom_widget_manager()

    IN_COLAB = True
    print("Running in Colab...")

    # Clone repo
    !git clone --recurse-submodules -b master https://github.com/ocean-data-factory-sweden/kso.git
    %pip install -qr <(sed '/Pillow/d;/ipywidgets/d' kso/yolov5_tracker/requirements.txt) -qr <(sed '/Pillow/d;/ipywidgets/d' kso/yolov5_tracker/yolov5/requirements.txt) -qr <(sed '/Pillow/d;/ipywidgets/d' kso/kso_utils/requirements.txt)

    # Fix libmagic issue
    !apt-get -qq update && apt-get -qq install -y libmagic-dev > /dev/null

    # Navigate to the correct folder
    os.chdir("kso/tutorials")

except:
    IN_COLAB = False


# Ensure widgets are shown properly
!jupyter nbextension enable --user --py widgetsnbextension
!jupyter nbextension enable --user --py jupyter_bbox_widget
!jupyter nbextension enable --user --py ipysheet

clear_output()
if IN_COLAB == True:
    print("Running in Colab: All packages are installed and ready to go!")
else:
    print("Running locally... you're good to go!")

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path
from datetime import date

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
sys.path.append("..")

# Enable third-party widgets(ipysheet)
from google.colab import output

output.enable_custom_widget_manager()

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.t8_utils as t8

print("Packages loaded successfully")

### Choose your project

In [None]:
project_name = t_utils.choose_project()

## Initiate sql database

In [None]:
# Store the project selected
project = p_utils.find_project(project_name=project_name.value)

# Initiate db
db_info_dict = t_utils.initiate_db(project)

### Connect to and retrieve info from zooniverse

In [None]:
# Connect to Zooniverse project
zoo_project = t_utils.connect_zoo_project(project)

# Retrieve relevant info from Zooniverse
zoo_info_dict = t_utils.retrieve__populate_zoo_info(
    project=project,
    db_info_dict=db_info_dict,
    zoo_project=zoo_project,
    zoo_info=["subjects", "workflows", "classifications"],
)

## Select zooniverse classifications to process

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

In [None]:
# To add - a widget to select date range of the classifications of interest

In [None]:
# Select classifications from the workflow of interest
class_df = t8.get_classifications(
    wm.checks,
    workflows_df,
    wm.checks["Subject type: #0"],
    zoo_info_dict["classifications"],
    db_info_dict["db_path"],
    project,
)

In [None]:
# To add - Process the classifications (i.e. unnest the json classifications) to have a label/species per row

In [None]:
# To add - Modify the vidProcess the classifications (i.e. unnest the json classifications) to have a label/species per row

In [None]:
# To add - Provide widget option for user to select the columns they want to download

In [None]:
# Download the processed classifications as a csv file
csv_filename = project.Project_name + str(date.today()) + "classifications.csv"
class_df.to_csv(csv_filename)

In [None]:
# END