<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">Colab KSO Tutorials #5: Train machine learning models</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: March 22, 2022</h5>

# Set up and requirements

### Install and import Python packages

In [None]:
!git clone --recurse-submodules https://github.com/ocean-data-factory-sweden/koster_yolov4.git
!pip install -r koster_yolov4/requirements.txt

In [None]:
# Test if panoptes can be loaded
try:
  !pip install git+https://github.com/zooniverse/panoptes-python-client.git
  import panoptes_client
except:
  print('Restarting runtime...')
  exit()

In [None]:
%load_ext autoreload
%autoreload 2

import sys, os
from pathlib import Path

os.chdir("koster_yolov4/notebooks")
sys.path.append('..')

In [None]:
# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.server_utils as s_utils
import kso_utils.project_utils as p_utils
import kso_utils.t4_utils as t4
import kso_utils.t5_utils as t5
import kso_utils.t8_utils as t8
from src.prepare_zooniverse import frame_aggregation
from kso_utils.zooniverse_utils import populate_agg_annotations

# Model-specific imports
# import yolo_train as train
# import yolo_test as test
# import yolo_detect as detect

print("Packages loaded successfully")

Packages loaded successfully


### Choose your project

In [None]:
project_name = t_utils.choose_project()

Dropdown(description='Project:', options=('Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU', 'Medins'), value='…

In [None]:
project = p_utils.find_project(project_name=project_name.value)

### Initiate SQL database and populate sites, movies and species

In [None]:
# Initiate db
db_info_dict = t_utils.initiate_db(project)

Enter the key id for the aws server··········
Enter the secret access key for the aws server··········


None/sites_buv_doc.csv: 100%|██████████| 145k/145k [00:01<00:00, 134kB/s]
None/movies_buv_doc.csv: 100%|██████████| 14.2k/14.2k [00:00<00:00, 32.4kB/s]
None/species_buv_doc.csv: 100%|██████████| 7.53k/7.53k [00:00<00:00, 17.0kB/s]
None/surveys_buv_doc.csv: 100%|██████████| 1.78k/1.78k [00:00<00:00, 3.95kB/s]
None/choices_buv.csv: 100%|██████████| 3.54k/3.54k [00:00<00:00, 7.58kB/s]


Updated sites
Updated movies
Updated species


In [None]:
# Connect to Zooniverse project
zoo_project = t_utils.connect_zoo_project(project)

Enter your Zooniverse user··········
Enter your Zooniverse password··········


### Retrieve Zooniverse information

In [None]:
zoo_info_dict = t_utils.retrieve__populate_zoo_info(project = project, 
                                                    db_info_dict = db_info_dict,
                                                    zoo_project = zoo_project,
                                                    zoo_info = ["subjects", "workflows", "classifications"])

Retrieving subjects from Zooniverse
subjects were retrieved successfully
Retrieving workflows from Zooniverse
workflows were retrieved successfully
Retrieving classifications from Zooniverse
classifications were retrieved successfully
Updated subjects
The database has a total of 978 frame subjects and 4993 clip subjects have been updated


# Prepare the labelled frames

### Select species of interest and path to store the data

In [None]:
# Choose species of interest for model training
species_i = t4.choose_species(db_info_dict["db_path"])

SelectMultiple(description='Species', index=(0,), options=('Bait', 'Banded weedfish', 'Banded wrasse', 'Barrac…

In [None]:
# Store selected classes of interest
cl = list(species_i.value)
print("The select species are", cl)

The select species are ['Bait', 'Blue cod', 'Other', 'Scarlet wrasse', 'Snapper']


In [None]:
# Specify path to store the labelled frames and annotations
fc = t_utils.choose_folder(".", "output")

FileChooser(path='.', filename='', title='HTML(value='Choose location of output')', show_hidden='False', use_d…

In [None]:
# Store selected output path
output_folder = fc.selected

### Aggregate classifications from Zooniverse

In [None]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

WidgetMaker(children=(IntText(value=0, description='Number of workflows:', style=DescriptionStyle(description_…

Output()

In [None]:
# Retrieve classifications from the workflow of interest
class_df = t8.get_classifications(wm.checks,
                                   workflows_df, 
                                   'frame', 
                                   zoo_info_dict["classifications"], 
                                   db_info_dict["db_path"],
                                   project)

There are 2 classifications out of 8717 missing subject info. Maybe the subjects have been removed from Zooniverse?
Zooniverse classifications have been retrieved


In [None]:
# Specify the agreement threshold required among cit scientists
agg_params = t8.choose_agg_parameters("frame")

FloatSlider(value=0.8, continuous_update=False, description='Aggregation threshold:', max=1.0, readout_format=…

IntSlider(value=3, continuous_update=False, description='Min numbers of users:', max=15, min=1, style=SliderSt…

FloatSlider(value=0.8, continuous_update=False, description='Object threshold:', max=1.0, readout_format='.1f'…

FloatSlider(value=0.5, continuous_update=False, description='IOU Epsilon:', max=1.0, readout_format='.1f', sty…

FloatSlider(value=0.8, continuous_update=False, description='Inter user agreement:', max=1.0, readout_format='…

In [None]:
agg_class_df, raw_class_df = t8.aggregrate_classifications(
                                    class_df, 'frame', project, agg_params)

Aggregrating the classifications
115 classifications aggregated out of 969 unique subjects available


In [None]:
# Add annotations to db
populate_agg_annotations(agg_class_df, 'frame', project)

Updated agg_annotations_frame


### Download frames and aggregated annotations

In [None]:
# Determine your training parameters
percentage_test = t5.choose_test_prop()

FloatSlider(value=0.2, continuous_update=False, description='Test proportion:', max=1.0, readout_format='.1f',…

In [None]:
# Run the preparation script
frame_aggregation(project, db_info_dict, output_folder, percentage_test.value, cl,
                  (720, 540), remove_nulls=True, track_frames=True, n_tracked_frames=10)

 (repeated 2 more times)


movie key is tapuae-buv-2011/2011 Oakura Control/EE16_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD6_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD2_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/EE16_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD6_2011.mpg
movie key is kapiti-buv-2020/KPT_023/Rangatira pointKAP23_2021.MP4
movie key is tapuae-buv-2011/2011 Oakura Control/DD2_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD12_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD5_2011.mpg
movie key is tapuae-buv-2011/PMR22_2012.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD5_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD5_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD6_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/EE13_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD2_2011.mpg
movie key is tapuae-buv-2011/2011 Oakura Control/DD2_2011.mpg
movie key is tapu



Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/2011%20Oakura%20Control/DD6_2011.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=q3UQW%2Fbn9s0wuWVqmP1uk3yHGPY%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/2011%20Oakura%20Control/DD2_2011.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=ElfoVSo3Mvqf8MxRLDW1Vwc%2BJnA%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/kapiti-buv-2020/KPT_023/Rangatira%20pointKAP23_2021.MP4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=s65PILjT2BfnlisTfArGTELPVIw%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/2011%20Oakura%20Control/DD12_2011.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=naJDYbXM9I%2FyeTyH1NAYleZO4DQ%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/2011%20Oakura%20Control/DD5_2011.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=qw1C39nZek5Coy6%2BtBhv1vg1pRA%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/PMR22_2012.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=Ok3CiwSJO1%2Ft8I5z%2F5e%2FPISjILY%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/2011%20Oakura%20Control/EE13_2011.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=GDMxFwLH9HaDscSjRgpYvyHUswU%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/tetapuwaeorongokako-buv-2021/WP3/WP3.mp4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=z%2BHGZ1FY%2FsmbnCtlO7ysS9qsGOQ%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/kapiti-buv-2020/KPT_017/KPT_017_2021_02_26.MP4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=uGJr%2FvUTw7RwTUZxF8xfdsXhD%2Bs%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/kapiti-buv-2020/KPT_024/Green%20canKAP24_2021.MP4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=jXVD5FuHjeYSe4sX97YjzyRGUjc%3D&Expires=1649378274




Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/PMR23_2012.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=WzFWBVDA6yhG2TVpux6qtXhnz44%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/tetapuwaeorongokako-buv-2021/WP54/WP54.mp4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=%2BsN0FRSIQbnAhadbqebM64C5Ufc%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/tapuae-buv-2011/CON28_2012.mpg?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=%2FxIXXiVl7JvzUvipXcz90vuC%2BIw%3D&Expires=1649378274
Reading https://marine-buv.s3.amazonaws.com/tetapuwaeorongokako-buv-2021/WP40/WP40.mp4?AWSAccessKeyId=AKIA3OG2BHYTMH6FJWO5&Signature=k4nM%2BBor323pXqW5RYSx3AdXhHY%3D&Expires=1649378274


  self._stream.seek(timestamp + self._first_pts)
  0%|          | 0/40 [00:02<?, ?it/s]


AttributeError: ignored

# Train and evaluate the ML model

In [None]:
# Fix important paths
data_path = [str(Path(output_folder, _)) for _ in os.listdir(output_folder) if \
             _.endswith(".yaml") and "hyp" not in _][-1]
hyps_path = str(Path(output_folder, "hyp.yaml"))
weights = "yolov5m.pt"

# Choose folder that will contain the different model runs
project_path = FileChooser('.')

# Project-specific information
entity = "koster"
exp_name = "test"
display(project_path)

### Train model with given configuration

In [None]:
train.run(entity=entity, data=data_path, hyp=hyps_path, weights=weights, 
          project=project_path.selected, name=exp_name,
          img_size=[720, 540], batch=int(batch_size.value),
          epochs=epochs.value, workers=4, single_cls=True, cache_images=True)

### Evaluate model performance on test set

In [None]:
# Choose model
eval_model = FileChooser(project_path.selected)
display(eval_model)

In [None]:
# Find trained model weights
tuned_weights = f"{Path(project_path.selected, eval_model.selected, 'weights', 'best.pt')}"

In [None]:
# Evaluate YOLO Model on Unseen Test data for mAP metric

In [None]:
test.run(data=data_path, weights=tuned_weights, conf_thres=conf_thres.value, imgsz=640)

### Transfer model to web app server (for API use)

In [None]:
import getpass

In [None]:
server_user = getpass.getpass('Enter your server user')
server_pass = getpass.getpass('Enter your server password')

In [None]:
t6.transfer_model("bm_nmi18_enhanced", eval_model.selected, "koster/sgu", server_user, server_pass)

# (Experimental) : Enhance annotations using trained model

In [None]:
import detect
detect.run(weights=tuned_weights, source=output_folder+"/images", imgsz=640, conf_thres=0.02, save_txt=True)

In [None]:
# Choose runs
runs = FileChooser(".")
display(runs)

In [None]:
!mv {output_folder}"/labels" {output_folder}"/labels_org"
!mv {runs.selected}"/labels" {output_folder}"/labels"

In [None]:
#END