<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>
<h1 align="right">Colab KSO Tutorials #5: Train machine learning models</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: April 12, 2022</h5>

# Set up and requirements

### Install and import Python packages

In [1]:
!git clone --recurse-submodules https://github.com/ocean-data-factory-sweden/koster_yolov4.git
!pip install -r koster_yolov4/requirements.txt

Cloning into 'koster_yolov4'...
remote: Enumerating objects: 362, done.[K
remote: Counting objects: 100% (362/362), done.[K
remote: Compressing objects: 100% (279/279), done.[K
remote: Total 362 (delta 179), reused 239 (delta 81), pack-reused 0[K
Receiving objects: 100% (362/362), 505.20 KiB | 4.95 MiB/s, done.
Resolving deltas: 100% (179/179), done.
Submodule 'kso_utils' (https://github.com/ocean-data-factory-sweden/kso_utils.git) registered for path 'kso_utils'
Cloning into '/content/koster_yolov4/kso_utils'...
remote: Enumerating objects: 347, done.        
remote: Counting objects: 100% (220/220), done.        
remote: Compressing objects: 100% (153/153), done.        
remote: Total 347 (delta 138), reused 144 (delta 65), pack-reused 127        
Receiving objects: 100% (347/347), 195.01 KiB | 3.42 MiB/s, done.
Resolving deltas: 100% (220/220), done.
Submodule path 'kso_utils': checked out '3a2fb178ba99e9912ee96999f379efcbc050e24d'
Obtaining panoptes_client from git+https://gith

In [21]:
# Solution to avoid opncv and pims library issues based on https://stackoverflow.com/questions/71204741/how-to-fix-error-module-cv2-has-no-attribute-legacy-on-python-3-7-9-and-w
!pip uninstall opencv-python -y
!pip uninstall opencv-contrib-python -y
!pip install opencv-contrib-python

Found existing installation: opencv-contrib-python 4.5.5.62
Uninstalling opencv-contrib-python-4.5.5.62:
  Successfully uninstalled opencv-contrib-python-4.5.5.62
Collecting opencv-contrib-python
  Downloading opencv_contrib_python-4.5.5.64-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (66.7 MB)
[K     |████████████████████████████████| 66.7 MB 23 kB/s 
Installing collected packages: opencv-contrib-python
Successfully installed opencv-contrib-python-4.5.5.64


In [None]:
# Test if panoptes can be loaded
try:
  !pip install git+https://github.com/zooniverse/panoptes-python-client.git
  import panoptes_client
except:
  print('Restarting runtime...')
  exit()

In [2]:
# Set the directory of the libraries
import sys, os
os.chdir("koster_yolov4/notebooks")
sys.path.append('..')

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.server_utils as s_utils
import kso_utils.project_utils as p_utils
import kso_utils.t4_utils as t4
import kso_utils.t5_utils as t5
import kso_utils.t8_utils as t8
from src.prepare_zooniverse import frame_aggregation
from kso_utils.zooniverse_utils import populate_agg_annotations

# Model-specific imports
# import yolo_train as train
# import yolo_test as test
# import yolo_detect as detect

print("Packages loaded successfully")



Packages loaded successfully


### Choose your project

In [3]:
project_name = t_utils.choose_project()

Dropdown(description='Project:', options=('Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU', 'Medins'), value='…

In [4]:
project = p_utils.find_project(project_name=project_name.value)

### Initiate SQL database and populate sites, movies and species

In [5]:
# Initiate db
db_info_dict = t_utils.initiate_db(project)

Enter the key id for the aws server··········
Enter the secret access key for the aws server··········


None/sites_buv_doc.csv: 100%|██████████| 145k/145k [00:01<00:00, 134kB/s]
None/movies_buv_doc.csv: 100%|██████████| 14.2k/14.2k [00:00<00:00, 33.2kB/s]
None/species_buv_doc.csv: 100%|██████████| 7.53k/7.53k [00:00<00:00, 18.0kB/s]
None/surveys_buv_doc.csv: 100%|██████████| 1.78k/1.78k [00:00<00:00, 4.05kB/s]
None/choices_buv.csv: 100%|██████████| 3.54k/3.54k [00:00<00:00, 8.38kB/s]


Updated sites
Updated movies
Updated species


In [6]:
# Connect to Zooniverse project
zoo_project = t_utils.connect_zoo_project(project)

Enter your Zooniverse user··········
Enter your Zooniverse password··········


### Retrieve Zooniverse information

In [7]:
zoo_info_dict = t_utils.retrieve__populate_zoo_info(project = project, 
                                                    db_info_dict = db_info_dict,
                                                    zoo_project = zoo_project,
                                                    zoo_info = ["subjects", "workflows", "classifications"])

Retrieving subjects from Zooniverse
subjects were retrieved successfully
Retrieving workflows from Zooniverse
workflows were retrieved successfully
Retrieving classifications from Zooniverse
classifications were retrieved successfully
Updated subjects
The database has a total of 978 frame subjects and 4993 clip subjects have been updated


# Prepare the labelled frames

### Select species of interest and path to store the data

In [8]:
# Choose species of interest for model training
species_i = t4.choose_species(db_info_dict["db_path"])

SelectMultiple(description='Species', index=(0,), options=('Bait', 'Banded weedfish', 'Banded wrasse', 'Barrac…

In [9]:
# Store selected classes of interest
cl = list(species_i.value)
print("The select species are", cl)

The select species are ['Blue cod']


In [10]:
# Specify path to store the labelled frames and annotations
fc = t_utils.choose_folder(".", "output")

FileChooser(path='.', filename='', title='HTML(value='Choose location of output')', show_hidden='False', use_d…

In [12]:
# Store selected output path
output_folder = fc.selected

### Aggregate classifications from Zooniverse

In [13]:
# Display a selectable list of workflow names and a list of versions of the workflow of interest
workflows_df = zoo_info_dict["workflows"]
wm = t8.WidgetMaker(workflows_df)
wm

WidgetMaker(children=(IntText(value=0, description='Number of workflows:', style=DescriptionStyle(description_…

Output()

In [15]:
# Retrieve classifications from the workflow of interest
class_df = t8.get_classifications(wm.checks,
                                   workflows_df, 
                                   'frame', 
                                   zoo_info_dict["classifications"], 
                                   db_info_dict["db_path"],
                                   project)

There are 2 classifications out of 8717 missing subject info. Maybe the subjects have been removed from Zooniverse?
Zooniverse classifications have been retrieved


In [16]:
# Specify the agreement threshold required among cit scientists
agg_params = t8.choose_agg_parameters("frame")

FloatSlider(value=0.8, continuous_update=False, description='Aggregation threshold:', max=1.0, readout_format=…

IntSlider(value=3, continuous_update=False, description='Min numbers of users:', max=15, min=1, style=SliderSt…

FloatSlider(value=0.8, continuous_update=False, description='Object threshold:', max=1.0, readout_format='.1f'…

FloatSlider(value=0.5, continuous_update=False, description='IOU Epsilon:', max=1.0, readout_format='.1f', sty…

FloatSlider(value=0.8, continuous_update=False, description='Inter user agreement:', max=1.0, readout_format='…

In [17]:
agg_class_df, raw_class_df = t8.aggregrate_classifications(
                                    class_df, 'frame', project, agg_params)

Aggregrating the classifications
28 classifications aggregated out of 969 unique subjects available


In [18]:
# Add annotations to db
populate_agg_annotations(agg_class_df, 'frame', project)

Updated agg_annotations_frame


### Download frames and aggregated annotations

In [19]:
# Determine your training parameters
percentage_test = t5.choose_test_prop()

FloatSlider(value=0.2, continuous_update=False, description='Test proportion:', max=1.0, readout_format='.1f',…

In [20]:
# Run the preparation script
frame_aggregation(project, db_info_dict, output_folder, percentage_test.value, cl,
                  (720, 540), remove_nulls=True, track_frames=True, n_tracked_frames=10)

  self._stream.seek(timestamp + self._first_pts)
  0%|          | 0/3 [10:13<?, ?it/s]


KeyboardInterrupt: ignored

# Train and evaluate the ML model

In [None]:
# Fix important paths
data_path = [str(Path(output_folder, _)) for _ in os.listdir(output_folder) if \
             _.endswith(".yaml") and "hyp" not in _][-1]
hyps_path = str(Path(output_folder, "hyp.yaml"))
weights = "yolov5m.pt"

# Choose folder that will contain the different model runs
project_path = FileChooser('.')

# Project-specific information
entity = "koster"
exp_name = "test"
display(project_path)

### Train model with given configuration

In [None]:
train.run(entity=entity, data=data_path, hyp=hyps_path, weights=weights, 
          project=project_path.selected, name=exp_name,
          img_size=[720, 540], batch=int(batch_size.value),
          epochs=epochs.value, workers=4, single_cls=True, cache_images=True)

### Evaluate model performance on test set

In [None]:
# Choose model
eval_model = FileChooser(project_path.selected)
display(eval_model)

In [None]:
# Find trained model weights
tuned_weights = f"{Path(project_path.selected, eval_model.selected, 'weights', 'best.pt')}"

In [None]:
# Evaluate YOLO Model on Unseen Test data for mAP metric

In [None]:
test.run(data=data_path, weights=tuned_weights, conf_thres=conf_thres.value, imgsz=640)

### Transfer model to web app server (for API use)

In [None]:
import getpass

In [None]:
server_user = getpass.getpass('Enter your server user')
server_pass = getpass.getpass('Enter your server password')

In [None]:
t6.transfer_model("bm_nmi18_enhanced", eval_model.selected, "koster/sgu", server_user, server_pass)

# (Experimental) : Enhance annotations using trained model

In [None]:
import detect
detect.run(weights=tuned_weights, source=output_folder+"/images", imgsz=640, conf_thres=0.02, save_txt=True)

In [None]:
# Choose runs
runs = FileChooser(".")
display(runs)

In [None]:
!mv {output_folder}"/labels" {output_folder}"/labels_org"
!mv {runs.selected}"/labels" {output_folder}"/labels"

In [None]:
#END