<a href="https://colab.research.google.com/github/ocean-data-factory-sweden/kso-data-management/blob/main/tutorials/01_Check_and_update_csv_files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img align="left" src="https://panoptes-uploads.zooniverse.org/project_avatar/86c23ca7-bbaa-4e84-8d8a-876819551431.png" type="image/png" height=100 width=100>
</img>


<h1 align="right">KSO Tutorial #1: Check and update csv files</h1>
<h3 align="right">Written by KSO Team</h3>

# Set up and requirements

In [None]:
# @title <font size="5">↓ ឵឵<i>Install kso_data_management and its requirements</font> { vertical-output: true }

from IPython.display import clear_output

try:
    import google.colab
    import os

    IN_COLAB = True
    print("Running in Colab...")

    # Clone kso-data-management repo
    !git clone --quiet --recurse-submodules -b main https://github.com/ocean-data-factory-sweden/kso-data-management.git
    !pip install -q --upgrade pip
    !pip install -q -r kso-data-management/requirements.txt

    # Fix libmagic issue
    !apt-get -qq update && apt-get -qq install -y libmagic-dev > /dev/null

    # Enable external widgets
    from google.colab import output

    output.enable_custom_widget_manager()

    os.chdir("kso-data-management/tutorials")
    try:
        clear_output()
        print("All packages are installed and ready to go!")
    except:
        clear_output()
        print("There have been some issues installing the packages!")
except:
    IN_COLAB = False
    import sys

    # Install requirements
    !pip install -q --no-warn-script-location --upgrade pip
    !pip install -qr ../requirements.txt

    !jupyter nbextension install --user --py widgetsnbextension
    !jupyter nbextension enable --user --py widgetsnbextension
    !jupyter nbextension install --user --py jupyter_bbox_widget
    !jupyter nbextension enable --user --py jupyter_bbox_widget
    !jupyter nbextension enable --user --py ipysheet

    clear_output()
    print("Running locally... you're good to go!")

#######Import Python packages########

# Set the directory of the libraries
import sys, os
from pathlib import Path

# Enables testing changes in utils
%load_ext autoreload
%autoreload 2

# Specify the path of the tutorials
sys.path.append("..")

# Import required modules
import kso_utils.tutorials_utils as t_utils
import kso_utils.project_utils as p_utils
import kso_utils.server_utils as s_utils
import kso_utils.t1_utils as t1

print("Packages loaded successfully")

In [None]:
# @title <font size="5">↓ ឵឵<i>Choose your project</font> { vertical-output: true }

project_name = t_utils.choose_project()

In [None]:
# @title <font size="5">↓ ឵឵<i>Initiate project's database</font> { vertical-output: true }
# Save the name of the project
project = p_utils.find_project(project_name=project_name.value)

# Initiate db
db_info_dict = t_utils.initiate_db(project)

# Review Sites

In [None]:
# @title <font size="5">↓ ឵឵<i>Map sites and metadata</font> { vertical-output: true }

kso_map = t1.map_site(db_info_dict, project)
kso_map

## Manually update sites metadata

In [None]:
# @title <font size="5">↓ ឵឵<i>Select the range of sites to display</font> { vertical-output: true }

sites_df, sites_range_rows, sites_range_columns = t1.select_sheet_range(
    db_info_dict=db_info_dict, orig_csv="local_sites_csv"
)

In [None]:
# @title <font size="5">↓ ឵឵<i>Update the contents of the cells in the spreadsheet below as needed</font> { vertical-output: true }

sites_df_filtered, sites_sheet = t1.open_csv(
    df=sites_df, df_range_rows=sites_range_rows, df_range_columns=sites_range_columns
)
sites_sheet

In [None]:
# @title <font size="5">↓ ឵឵<i>Review the changes</font> { vertical-output: true }

# Display the changes
highlight_changes, sites_sheet_df = t1.display_changes(
    db_info_dict, isheet=sites_sheet, df_filtered=sites_df_filtered
)
highlight_changes

In [None]:
# @title <font size="5">↓ ឵឵<i>Confirm or deny the changes</font> { vertical-output: true }

# Confirm or deny the changes
t1.update_csv(
    db_info_dict,
    project,
    sheet_df=sites_sheet_df,
    df=sites_df,
    local_csv="local_sites_csv",
    serv_csv="server_sites_csv",
)

# Review Movies

In [None]:
# @title <font size="5">↓ ឵឵<i>Retrieve info of movies available on the server</font> { vertical-output: true }

available_movies_df = s_utils.retrieve_movie_info_from_server(
    project=project, db_info_dict=db_info_dict
)

## Preview movies

In [None]:
# @title <font size="5">↓ ឵឵<i>Select movies to preview</font> { vertical-output: true }
movie_selected = t_utils.select_movie(available_movies_df)

In [None]:
# @title <font size="5">↓ ឵឵<i>Display the movie</font> { vertical-output: true }
movie_display, movie_path = t_utils.preview_movie(
    project=project,
    db_info_dict=db_info_dict,
    available_movies_df=available_movies_df,
    movie_i=movie_selected.value,
)
movie_display

## Automatic check of movies metadata

In [None]:
# @title <font size="5">↓ ឵឵<i>Choose the review method</font> { vertical-output: true }
review_method = t1.choose_movie_review()

In [None]:
# @title <font size="5">↓ ឵឵<i>Specify the GPU availability</font> { vertical-output: true }
gpu_available = t_utils.gpu_select()

In [None]:
# @title <font size="5">↓ ឵឵<i>Check and update the movies information</font> { vertical-output: true }
t1.check_movies_csv(
    db_info_dict=db_info_dict,
    available_movies_df=available_movies_df,
    project=project,
    review_method=review_method,
    gpu_available=gpu_available.result,
)

## Manually update movies metadata

In [None]:
# @title <font size="5">↓ ឵឵<i>Select the range of movies to display</font> { vertical-output: true }
movies_df, movies_range_rows, movies_range_columns = t1.select_sheet_range(
    db_info_dict=db_info_dict, orig_csv="local_movies_csv"
)

In [None]:
# @title <font size="5">↓ ឵឵<i>Update the contents of the cells in the spreadsheet below as needed</font> { vertical-output: true }
movies_df_filtered, movies_sheet = t1.open_csv(
    df=movies_df, df_range_rows=movies_range_rows, df_range_columns=movies_range_columns
)
movies_sheet

In [None]:
# @title <font size="5">↓ ឵឵<i>Review the changes</font> { vertical-output: true }

highlight_changes, movies_sheet_df = t1.display_changes(
    db_info_dict, isheet=movies_sheet, df_filtered=movies_df_filtered
)
highlight_changes

In [None]:
# @title <font size="5">↓ ឵឵<i>Confirm or deny the changes</font> { vertical-output: true }
# Confirm or deny the changes
t1.update_csv(
    db_info_dict,
    project,
    sheet_df=movies_sheet_df,
    df=movies_df,
    local_csv="local_movies_csv",
    serv_csv="server_movies_csv",
)

# Review species

In [None]:
# @title <font size="5">↓ ឵឵<i>Automatically check the species dataframe</font> { vertical-output: true }
species_sheet_df = t1.check_species_csv(db_info_dict=db_info_dict, project=project)

## Manually update species dataframe

In [None]:
# @title <font size="5">↓ ឵឵<i>Select the range of species to display</font> { vertical-output: true }
species_df, species_range_rows, species_range_columns = t1.select_sheet_range(
    db_info_dict=db_info_dict, orig_csv="local_species_csv"
)

In [None]:
# @title <font size="5">↓ ឵឵<i>Update the contents of the cells in the spreadsheet below as needed</font> { vertical-output: true }

species_df_filtered, species_sheet = t1.open_csv(
    df=species_df,
    df_range_rows=species_range_rows,
    df_range_columns=species_range_columns,
)
species_sheet

In [None]:
# @title <font size="5">↓ ឵឵<i>Review the changes</font> { vertical-output: true }

# Display the changes
highlight_changes, species_sheet_df = t1.display_changes(
    db_info_dict, isheet=species_sheet, df_filtered=species_df_filtered
)
highlight_changes

In [None]:
# @title <font size="5">↓ ឵឵<i>Confirm or deny the changes</font> { vertical-output: true }

# Confirm or deny the changes
t1.update_csv(
    db_info_dict,
    project,
    sheet_df=species_sheet_df,
    df=species_df,
    local_csv="local_species_csv",
    serv_csv="server_species_csv",
)

In [None]:
# END