<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">Colab KSO Tutorial #1: Check and update csv files</h1>
<h3 align="right">Written by @jannesgg and @vykanton</h3>
<h5 align="right">Last updated: Aug 28th, 2022</h5>

# Set up and requirements

## Install kso_data_management and its requirements

In [None]:
# Clone koster_data_management repo
!git clone --recurse-submodules -b dev https://github.com/ocean-data-factory-sweden/koster_data_management.git
!pip install -r koster_data_management/requirements.txt

# Install ipysheet version compatible in colab
!pip install ipysheet==0.4.4

# Restart the session to load the latest packages
exit()

### Import Python packages

In [None]:
# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
os.chdir("koster_data_management/tutorials")
sys.path.append('..')

# Enable third-party widgets(ipysheet)
from google.colab import output
output.enable_custom_widget_manager()

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.t1_utils as t1

print("Packages loaded successfully")

### Choose your project

In [2]:
project_name = t_utils.choose_project()

Dropdown(description='Project:', options=('Template project', 'Koster_Seafloor_Obs', 'Spyfish_Aotearoa', 'SGU'…

## Initiate database

In [3]:
project = p_utils.find_project(project_name=project_name.value)
db_info_dict = t_utils.initiate_db(project)

Downloading...
From: https://drive.google.com/uc?id=1PZGRoSY_UpyLfMhRphMUMwDXw4yx1_Fn
To: /content/koster_data_management/tutorials/db_csv_info.zip
100%|██████████| 2.25k/2.25k [00:00<00:00, 3.39MB/s]
ERROR:root:The table movies has invalid entries, please ensure that all columns are non-zero
ERROR:root:The invalid entries are    movie_id     filename  created_on  fps  duration  sampling_start  \
0         1  movie_1.mp4  13/08/2021  NaN       NaN             NaN   
1         2  movie_2.mp4  13/08/2021  NaN       NaN             NaN   
2         3  movie_3.mp4  13/08/2021  NaN       NaN             NaN   
3         4  movie_4.mp4  18/08/2021  NaN       NaN             NaN   
4         5  movie_5.mp4  18/08/2021  NaN       NaN             NaN   

   sampling_end         Author  Site_id  \
0           NaN  Author_name_1        1   
1           NaN  Author_name_2        1   
2           NaN  Author_name_3        1   
3           NaN  Author_name_4        1   
4           NaN  Author_name_

# Review Sites

## Map sites and metadata

In [None]:
kso_map = t1.map_site(db_info_dict, project)
kso_map

## Update sites metadata

Select the range of sites to display

In [9]:
sites_df, sites_range = t1.select_sheet_range(db_initial_info = db_info_dict,
                                              orig_csv = "local_sites_csv"
                                              )

SelectionRangeSlider(description='Rows to display', index=(0, 5), layout=Layout(padding='35px', width='90%'), …

You can update the contents of the cells in the spreadsheet below

In [10]:
sites_df_filtered, sites_sheet = t1.open_csv(df = sites_df, df_range = sites_range)
sites_sheet

Sheet(cells=(Cell(column_end=0, column_start=0, numeric_format='0[.]0', row_end=4, row_start=0, squeeze_row=Fa…

### Review and confirm the changes

In [11]:
# Display the changes
highlight_changes, sites_sheet_df = t1.display_changes(db_info_dict,
                   isheet = sites_sheet, 
                   df_filtered = sites_df_filtered
                   )
highlight_changes

Unnamed: 0_level_0,siteName,siteName,decimalLatitude,decimalLatitude,decimalLongitude,decimalLongitude,geodeticDatum,geodeticDatum,countryCode,countryCode
Unnamed: 0_level_1,Origin,Update,Origin,Update,Origin,Update,Origin,Update,Origin,Update
site_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1,Site_1,Site_1,-39.052281,-39.052281,174.023941,174.023941,WGS84,WGS84,NZ,NZ
2,Site_2,Site_2,-39.057633,-39.057633,174.018739,174.018739,WGS84,WGS84,NZ,NZ
3,Site_3,Site_3,-39.04892,-39.04892,174.0296,174.0296,WGS84,WGS84,NZ,NZ
4,Site_4,Site_4,-39.046859,-39.046859,174.014944,174.014944,WGS84,WGS84,NZ,NZ
5,Site_5,Site_5,-39.053019,-39.053019,174.015039,175.015039,WGS84,WGS84,NZ,NZ


In [12]:
# Confirm or deny the changes
t1.update_csv(db_info_dict, 
              project,
              sheet_df = sites_sheet_df,
              df = sites_df,
              local_csv = "local_sites_csv",
              serv_csv = "server_sites_csv"
              )


Are the changes above correct?


HBox(children=(Button(button_style='danger', description='Yes, details are correct', layout=Layout(width='25%'…

# Review Movies (WIP)

## Choose the review method

In [None]:
review_method = t1.choose_movie_review()

RadioButtons(description='What method you want to use to review the movies:', layout=Layout(width='95%'), opti…

## Check the movies information

In [None]:
movies_sheet_df = t1.check_movies_csv(db_info_dict, project, review_method)

In [None]:
# Confirm or deny the changes
t1.update_csv(db_info_dict, 
              project,
              sheet_df = movies_sheet_df,
              local_csv = "local_movies_csv",
              serv_csv = "server_movies_csv"
              )

## Preview movies

### Retrieve info of movies available on the server

In [None]:
available_movies_df = s_utils.retrieve_movie_info_from_server(
    project = project,
    db_info_dict = db_info_dict
)

### Select movies to preview

In [None]:
movie_selected = t_utils.select_movie(available_movies_df)

### Display the movie

In [None]:
movie_display, movie_path = t_utils.preview_movie(
    project = project,
    db_info_dict = db_info_dict, 
    available_movies_df = available_movies_df, 
    movie_i = movie_selected.value
)
movie_display

# Review species

## Check the species dataframe

In [14]:
species_sheet_df = t1.check_species_csv(db_initial_info= db_info_dict, project = project)

## Update species dataframe

Select the range of species to display

In [15]:
species_df, species_range = t1.select_sheet_range(db_initial_info = db_info_dict,
                                              orig_csv = "local_species_csv"
                                              )

SelectionRangeSlider(description='Rows to display', index=(0, 66), layout=Layout(padding='35px', width='90%'),…

You can update the contents of the cells in the spreadsheet below

In [16]:
species_df_filtered, species_sheet = t1.open_csv(df = species_df, df_range = species_range)
species_sheet

Sheet(cells=(Cell(column_end=0, column_start=0, numeric_format='0[.]0', row_end=6, row_start=0, squeeze_row=Fa…

### Review and confirm the changes

In [17]:
# Display the changes
highlight_changes, species_sheet_df = t1.display_changes(db_info_dict,
                   isheet = species_sheet, 
                   df_filtered = species_df_filtered
                   )
highlight_changes

Unnamed: 0_level_0,commonName,commonName,scientificName,scientificName,taxonRank,taxonRank,kingdom,kingdom
Unnamed: 0_level_1,Origin,Update,Origin,Update,Origin,Update,Origin,Update
species_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,Fan-shaped sponge,Fan-shaped sponge,Phakellia ventilabrum,Phakellia ventilabrum,species,species,Animalia,Animalia
2,Football sponge,Football sponge,Geodia barretti,Geodia barretti,species,species,Animalia,Animalia
3,Sponge (any species),Sponge (any species),Porifera,Porifera std,phylum,phylum,Animalia,Animalia
4,Dead man's fingers,Dead man's fingers,Alcyonium digitatum,Alcyonium digitatum,species,species,Animalia,Animalia
5,Deeplet sea anemone,Deeplet sea anemone,Bolocera tuediae,Bolocera tuediae,species,species,Animalia,Animalia
6,Common sea pen,Common sea pen,Pennatula phosphorea,Pennatula phosphorea,species,species,Animalia,Animalia
7,Tall sea pen,Tall sea pen,Funiculina quadrangularis,Funiculina quadrangularis,species,species,Animalia,Animalia


In [18]:
# Confirm or deny the changes
t1.update_csv(db_info_dict, 
              project,
              sheet_df = species_sheet_df,
              df = species_df,
              local_csv = "local_species_csv",
              serv_csv = "server_species_csv"
              )


Are the changes above correct?


HBox(children=(Button(button_style='danger', description='Yes, details are correct', layout=Layout(width='25%'…

In [None]:
#END