<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">KSO Tutorials #6: Train machine learning models</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Nov 3rd, 2021</h5>

# Set up and requirements

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys
from pathlib import Path
sys.path.append('..')

# Set to display dataframes as interactive tables
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)
from ipyfilechooser import FileChooser

# Import required modules
import tutorial_utils.t6_utils as t6
import tutorial_utils.t12_utils as t12
from tutorial_utils.zooniverse_utils import retrieve_zoo_info, populate_subjects, populate_agg_annotations
import getpass

print("Packages loaded successfully")

### Choose a project

In [None]:
project = t12.choose_project()

### Initiate SQL database and populate sites, movies and species

In [None]:
# Specify the path of the movies 
movies_path = "/cephyr/NOBACKUP/groups/snic2021-6-9/movies/"

# Specify the path of the sql database
db_path = "koster_lab.db"

# Initiate the SQL database 
%run -i "../db_starter/starter.py" --movies_path $movies_path --db_path $db_path

# Default weights path
weights = "/usr/src/app/data_dir/weights/yolov5m.pt"

### Retrieve Zooniverse information

In [None]:
# Save your user name, password and Zooniverse project number.
zoo_user = getpass.getpass('Enter your Zooniverse user')
zoo_pass = getpass.getpass('Enter your Zooniverse password')

In [None]:
# Specify the Zooniverse information required throughout the tutorial
zoo_info = ["subjects", "workflows", "classifications"]

# Retrieve and store the Zooniverse information required throughout the tutorial in a dictionary
zoo_info_dict = retrieve_zoo_info(zoo_user, zoo_pass, project.value, zoo_info)

In [None]:
# Populate the sql with subjects already uploaded to Zooniverse
populate_subjects(zoo_info_dict[1]["subjects"], project.value, db_path)

### Specify Zooniverse parameters for frame aggregation

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict[1]["workflows"]
workflow_name, subj_type = t12.choose_workflows(workflows_df)
workflow_version = t12.choose_w_version(workflows_df, workflow_name.value)

In [None]:
# Selects the workflow id based on the workflow name
workflow_id = workflows_df[workflows_df.display_name==workflow_name.value].workflow_id.unique()[0]

# Retrieve classifications from the workflow of interest
class_df = t12.get_classifications(workflow_id,
                                             workflow_version.value, 
                                             'frame', 
                                             zoo_info_dict[1]["classifications"], 
                                             db_path)

In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t12.choose_agg_parameters(subj_type.value)

In [None]:
agg_class_df, raw_class_df = t12.aggregrate_classifications(
                                    class_df, 'frame', project.value, agg_params)

In [None]:
# Add annotations to db
populate_agg_annotations(agg_class_df, 'frame', db_path)

### Step 0: Specify important paths and training parameters

In [None]:
# Specify output path where processed data will be stored
fc = FileChooser('.')
display(fc)

In [None]:
# Store selected output path
output_folder = fc.selected

In [None]:
# Choose species of interest for model training
class_list = t6.choose_classes(db_path)

In [None]:
# Store selected classes of interest
cl = list(class_list.value)

In [None]:
# Determine your training parameters
percentage_test, batch_size, epochs, conf_thres = t6.choose_test_prop()

### Step 1: Prepare the aggregated data

In [None]:
# Run the preparation script
%run -i "../src/prepare_zooniverse.py" -o $output_folder -db $db_path -m $movies_path -pt $percentage_test.value \
     --class_list $cl --img_size 720 576

### Step 2: Train the model with selected parameters

In [None]:
# Fix important paths
data_path = str(Path(output_folder, "koster.yaml"))
hyps_path = str(Path("/usr/src/app/data", "hyps", "hyp.scratch.yaml"))
# Choose folder that will contain the different model runs
project_path = FileChooser('/cephyr/NOBACKUP/groups/snic2021-6-9/models/koster-ml')
exp_name = "test_model"
display(project_path)
batch_size = int(batch_size.value)

# Choose a directory to store WANDB logs locally
main_directory = "/cephyr/NOBACKUP/groups/snic2021-6-9/"
os.environ['WANDB_DIR'] = main_directory

In [None]:
# Train YOLO model
%run -i "/usr/src/app/train.py" --entity koster --data $data_path --hyp $hyps_path --weights $weights  \
                          --project $project_path.selected --name $exp_name --batch $batch_size --epochs $epochs.value \
                          --single-cls --workers 4

### Step 3: Evaluate model performance on test set

In [None]:
# Find trained model weights
tuned_weights = f"{Path(project_path.selected, exp_name, 'best.pt')}"

In [None]:
# Evaluate YOLOv3 Model on Unseen Test data for mAP metric
%run -i "/usr/src/app/test.py" --data $data_path --weights $tuned_weights --conf-thres $conf_thres.value

### Transfer model to web app server (for API use)

In [None]:
server_user = getpass.getpass('Enter your server user')
server_pass = getpass.getpass('Enter your server password')

In [None]:
t6.transfer_model("test_model", "koster/koster-ml", server_user, server_pass)

In [None]:
#END