# Koster lab database

The following scripts guide you to create a sql database for the Koster project in Zooniverse

### Essential parameters

In [3]:
# Set paths to the files containing information about the species choices, movie filenames, sites, and duplicated subjects
sp_file = "https://drive.google.com/file/d/1SXRU8u5qHfRdjx4A6b_f0gJdmkCCFvhX/view?usp=sharing"
si_file = "https://drive.google.com/file/d/1zAhgivZaOBYcIHokBInAQBChfVQee0EU/view?usp=sharing"
mv_file = "https://drive.google.com/file/d/1c3-ZvojsT1VEaEg-uCbrg26U2AhHse10/view?usp=sharing"
dp_file = "https://drive.google.com/file/d/1AGuSMeS-lDBSkXkUkJU3MBpFRkBJPQW9/view?usp=sharing"

# Specify the path to the movie files
mov_path = "/uploads"

In [6]:
#%cd "../"
# Check the path to movie files
%ls $mov_path
#%ls

/usr/src/app/koster_data_management
'000114 TMBL-ROV 2000 Säckenrevet EJ numrerade band.mov'
'000114 TMBL-ROV 2000 Säckenrevet Tape 55.mov'
'000203 TMBL-ROV 2000 Säcken revet EJ numrerade band.mov'
'000203 TMBL-ROV 2000 Säcken EJ numrerade band.mov'
'000203 TMBL-ROV 2000 Säckenrevet Tape 56.mov'
'000203 TMBL-ROV 2000 säcken Tape 56.mov'
'010424 Säckenrevet alfa Tape 74.mov'
'010424 Säckenrevet beta Tape 74.mov'
 01448002.mov
 01448005.mov
 01448006.mov
 01450003.mov
 01450004.mov
 01451001.mov
 01451004.mov
 01451005.mov
 01451006.mov
 01451007.mov
 01451008_1.mov
 01451009.mov
 01451010.mov
 01451011.mov
 01451011_1.mov
 01453002.mov
 01453003.mov
 01453004.mov
 01453005.mov
 01454001.mov
 01454002.mov
 01454003.mov
 01454004.mov
 01454005.mov
 01454006_1.mov
 01455001.mov
 01455002.mov
 01455003.mov
'01459004 2.mov'
'01459004 2_1.mov'
 01484001_1.mov
 01484002_1.mov
 01484003_1.mov
 01484004_1.mov
 01484005_1.mov
 01484006_1.mov
 01484007_1.mov
 01485002_1.mov
 01485005.mov
 0148600

### Create and populate the database

In [7]:
# Delete previous database if exists
#import os
if os.path.exists("koster_lab.db"):
  os.remove("koster_lab.db")
else:
  print("The file does not exist")

# Initiate the db
%run -i "db_setup/init.py"

# Populate the db with info from the csv files
%run -i "db_setup/static.py" --species_csv $sp_file --sites_csv $si_file --movies_csv $mv_file --movies_path $mov_path

Empty DataFrame
Columns: [<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"/><title>Sorry...</title><style> body { font-family: verdana,  arial,  sans-serif; background-color: #fff; color: #000; }</style></head><body><div><table><tr><td><b><font face=sans-serif size=10><font color=#4285f4>G</font><font color=#ea4335>o</font><font color=#fbbc05>o</font><font color=#4285f4>g</font><font color=#34a853>l</font><font color=#ea4335>e</font></font></b></td><td style="text-align: left; vertical-align: bottom; padding-bottom: 15px; width: 50%"><div style="border-bottom: 1px solid #dfdfdf;">Sorry...</div></td></tr></table></div><div style="margin-left: 4em;"><h1>We're sorry...</h1><p>... but your computer or network may be sending automated queries. To protect our users,  we can't process your request right now.</p></div><div style="margin-left: 4em;">See <a href="https://support.google.com/websearch/answer/86640">Google Help</a> for more information.<br/><br/></div>

KeyError: "None of [Index(['koster_site_id', 'siteName', 'decimalLatitude', 'decimalLongitude',\n       'geodeticDatum', 'countryCode'],\n      dtype='object')] are in the [columns]"

In [None]:
# Initiate the db
%run -i "db_setup/init.py"

# Populate the db with info from the csv files
%run -i "db_setup/static.py" --species_csv $sp_file --sites_csv $si_file --movies_csv $mv_file --movies_path $mov_path

# Populate the db with info of subjects uploaded to Zooniverse
%run -i "db_setup/subjects_uploaded.py" --user $user_zoo --password $pass_zoo --db_path $db_path --duplicates_file_id $dp_file_id

# Process the clips that have been classified in Zooniverse
%run -i "db_setup/process_clips.py" --user $user_zoo --password $pass_zoo --db_path $db_path --duplicates_file_id $dp_file_id

# Process the frames that have been classified in Zooniverse
%run -i "db_setup/process_frames.py" --user $user_zoo --password $pass_zoo --db_path $db_path --duplicates_file_id $dp_file_id

### Optional parameters

In [None]:
# Specify the Zooniverse workflows of interest and their versions
workflow_clip = 11767
workflow_clip_version = 227
workflow_frame = 12852
workflow_frame_version = 21.85

# Specify the agreement threshold required among cit scientists
agg_user_clip = 0.8
agg_user_frames = 0.8

# Specifiy the min number of different Zooniverse users required per subject
min_users_clip = 3
min_users_frames = 5

## Summarise relevant db information

In [None]:
from utils.summary_utils import clips_summary
clips_summary("koster_lab.db")

## Upload new frames

### Essential parameters

In [None]:
# Specify the name of the species of interest and path to store the frames
# NOTE: use "\" if the species name has spaces (e.g. "Deep\ sea\ king\ crab")
species_i = "Sugar\ starfish"
folder_frames = "./frames"

### Optional parameters

In [None]:
# Specify the number of frames per clip you would like to upload
n_frames = 2

### Upload frames

In [None]:
# Upload frames of the species of interest to Zooniverse
%run -i "upload_subjects/upload_frames.py" --user $user_zoo --password $pass_zoo --db_path $db_path --species $species_i --frames_folder $folder_frames 

# Upload new clips

### Essential parameters

In [None]:
# Specify the number of clips to upload and path to store the clips
clips_n = 10
folder_clips = "./clips"

### Optional parameters

In [None]:
# Upload 10 second clips only from a specific movie file
video_interest = 1
clip_length = 10

### Upload clips

In [None]:
# Extract and upload the clips to Zooniverse
%run -i "upload_subjects/upload_clips.py" --user $user_zoo --password $pass_zoo --db_path $db_path --clips_folder $folder_clips --n_clips $clips_n  
