<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">KSO Tutorials #4: Add new clips to a Zooniverse workflow</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Sep 10th, 2021</h5>

# Set up and requirements

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys
sys.path.append('..')

# Import required modules
import utils.t4_utils as t4
from utils.zooniverse_utils import retrieve_zoo_info, populate_subjects
import getpass

print("Packages loaded successfully")

### Initiate SQL database and populate sites, movies and species

In [None]:
# Specify the path of the movies 
movies_path = "../movies"

# Specify the path of the sql database
db_path = "koster_lab.db"

# Initiate the SQL database 
%run -i "../db_starter/starter.py" --movies_path $movies_path --db_path $db_path

### Retrieve Zooniverse information

In [None]:
# Save your user name, password and Zooniverse project number.
zoo_user = getpass.getpass('Enter your Zooniverse user')
zoo_pass = getpass.getpass('Enter your Zooniverse password')
project_n = getpass.getpass('Enter the number of the Zooniverse project')

In [None]:
# Specify the Zooniverse information required throughout the tutorial
zoo_info = ["subjects", "workflows"]

# Retrieve and store the Zooniverse information required throughout the tutorial in a dictionary
zoo_info_dict = retrieve_zoo_info(zoo_user, zoo_pass, project_n, zoo_info)

In [None]:
# Populate the sql with subjects already uploaded to Zooniverse
populate_subjects(zoo_info_dict["subjects"], project_n, db_path)

######## Everything after this is WIP ######

### Step 1: Specify the settings to upload underwater videos to Zooniverse
We recommend to upload one movie at the time to avoid duplicates in the project

In [None]:
# Select the movie to upload the clips from and length of clips
movie_selection, clip_length = t4.choose_movies(db_path)

In [None]:
# Select how many clips you want to upload
n_clips = t4.choose_clips(movie_selection.value, clip_length.value, db_path)

In [None]:
# Select the subjectset to upload the clips to
subjectset_method = t4.choose_subjectset_method()

In [None]:
# Select the subjectset to upload the clips to
subjectset_upl = t4.choose_subjectset(zoo_info_dict["subjects"],subjectset_method.value)

In [None]:
# Select any pre-processing required on the clips

Make sure your workflows in Zooniverse have different names to avoid issues while selecting the workflow id

### Step 1: Reduce the size of the video and optionally blur sensitive portions
#### The first argument is the directory where movies are stored
*Note:  The original files are kept in case of any problems during processing with the suffix *_orig**

In [None]:
### Define important locations

In [None]:
# Find the folder with the appropriate movies
fc = FileChooser('/cephyr/NOBACKUP/groups/snic2021-6-9')
display(fc)

In [None]:
movies_location = fc.selected
movie_metadata_location = "https://drive.google.com/file/d/1IBBm4GqZGUZvnVJ3DbIHwJ_JQQW6sCEl/view?usp=sharing"
# Optional species metadata location if new species appear in movies
species_metadata_location = "https://drive.google.com/file/d/18_5h4fzX7zau-JltIRPoFrltJm_cbxyb/view?usp=sharing"
movie_extension = "mov" #if movies are not all of the same extension this will fail
blur_movies = 0 # (optional) if the videos contain sensitive information, use 1 here to blur the clips

In [None]:
!bash /usr/src/app/koster_data_management/ingestion_scripts/process_movies.sh \
        $movies_location $movie_extension $blur_movies

### Step 2: Specify Zooniverse and database credentials

In [None]:
# Specify username and password of a valid zooniverse account
user_zoo = getpass.getpass('Enter your Zooniverse user')
pass_zoo = getpass.getpass('Enter your Zooniverse password')

In [None]:
# Specify your database file location
db_path = "/usr/src/app/data_dir/koster_lab-nm-9.db"
#db_path = "/cephyr/NOBACKUP/groups/snic2021-6-9/db_files/tutorial_demo.db"

### Step 3: Add the new movies and new species (if applicable) to the Koster database

In [None]:
# New movies
add.add_new_movies(movie_metadata_location, 
                   db_path,
                   movies_location)
# New species

add.add_species(species_metadata_location,
                db_path)

### Step 4: Identify movies of interest

#### We first need to find the ids of the movies we are interested in splitting into clips. We do this by looking at the movies table within the database. 

In [None]:
# Look over the available movies to choose the ones to extract clips from
conn = db_utils.create_connection(db_path)
movies_df = pd.read_sql_query("SELECT * FROM movies", conn)

### Step 5: Upload clips to Zooniverse

The script to upload clips to Zooniverse requires a specification of the total number of clips, the length of each clip, the list of video ids to extract these from and (optionally) the number of clips to be extracted from each video in the list. 

You may receive an error message related to file size if clips exceed the recommended limit for Zooniverse uploads. In this case, we recommend shortening the clip length to achieve a suitable filesize.

In [None]:
# Find movies of interest
qgrid.show_grid(movies_df)

In [None]:
# Specify clip-related parameters
clips_output_path = "/cephyr/NOBACKUP/groups/snic2021-6-9/clips_challenge_1" # where clips will be stored
n_clips = 980
clip_length = 10 # in seconds
video_list = [61, 62, 63, 64]
n_clips_each = [60, 60, 60, 800]

In [None]:
# Run the script

In [None]:
run = f"export LC_ALL='C.UTF-8' python upload_clips.py -u {user_zoo} -p {pass_zoo} -db {db_path} -fp {clips_output_path} \
     -n {n_clips} -lg {clip_length} -vlist {video_list} \
     -neach {n_clips_each}"
!{run}

In [None]:
#### EVERYTHING BELOW THIS LINE IS STILL IN PROGRESS

## Adding new frames after clips have been classified

In [None]:
# Look over the available species to choose the ones to extract frames from
conn = db_utils.create_connection(db_path)
species_df = pd.read_sql_query("SELECT * FROM species", conn)

In [None]:
species_df.head()

In [None]:
!export LC_ALL="C.UTF-8" python upload_frames.py --user $user_zoo --password $pass_zoo \
        --species "Fish (any species)" --db_path $db_path \
        -fp /cephyr/NOBACKUP/groups/snic2021-6-9/processed_frames/fish_any_species