# Human-object interaction (HOI) problem domain Activity Detection 🚶‍♂️🚶‍♀️

Activity detection is very new as compared to more classical problems like object detection, this project work together with SIT-NVIDIA collaboration hopes to contribute to the advancement in this area. 


This notebook will allow us to easily switch between training/testing ML pipelines, models and other configuration settings using interactive UI. 💻💻

In [None]:
# Check your Python version is compatible with the notebook project.

from platform import python_version

# Lets make sure that your Juypter notebook is running Python 3.8.13.
if(python_version() == '3.8.13'):
    print(f"Hello, welcome to the HOI Activity Detection Project! You are running 3.8.13 which is the correct version for this project!")
else:
    print(f"Oops! This is the wrong version! You are currently in {python_version()} but we need 3.8.13. Please change your kernel to Python 3.8.13 and try again.")

# Imports and Setup
Lets start with the base imports and installing dependencies. 

In [None]:
# Run this cell to install all the required dependencies needed for this notebook

%pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
# This notebook requires pytorch and a device with a CUDA 11.3 capabilities.
%pip install timm==0.4.12 scikit-learn==1.1.1 numpy==1.23.0 tqdm==4.38.0 ipython==8.5.0 opencv-python==4.6.0.66  wandb==0.13.3  omegaconf==2.0.6 av==8.0.2 ipywidgets==8.0.2 pandas==1.5.0


In [None]:
# Run to import all dependencies
import importlib 
import sys, inspect
import datetime
import os
import shutil
import requests
import pandas as pd
import subprocess
import wandb
from os import listdir
from os.path import isfile, join
from pathlib import Path
from logging import raiseExceptions

# For display.
import ipywidgets as widgets
from ipywidgets import interact, interactive
from IPython.display import display, clear_output, YouTubeVideo
from IPython.utils import io

# For progress bar.
from tqdm.notebook import tqdm_notebook
from tqdm import tqdm
# Run this to import all dependencies required for this notebook
layout = widgets.Layout(width='auto') 
layout_hidden  = widgets.Layout(visibility = 'hidden')
layout_visible = widgets.Layout(visibility = 'visible')

cwd = Path.cwd()
if "tsu" in str(cwd) or "step" in str(cwd) or "MS-TCT" in str(cwd):
    %cd ..
    cwd = Path.cwd()
    
tsu_wd = cwd /  "tsu"
step_wd = cwd / "step"
mstct_wd = cwd / "MS-TCT"

def widget_slider(description, value,minimum, maximum):
    slider = widgets.IntSlider(value=value,min=minimum,max=maximum,step=1,description=description,layout = layout, style={'description_width': 'initial'})
    display(slider)
    return(slider)

def widget_textbox(description,value):
    textbox = widgets.Text(placeholder = "Enter the {0}".format(description), description= description, value = value, layout = layout, style={'description_width': 'initial'})
    display(textbox)
    return(textbox)

def widget_button(description):
    button = widgets.Button(description = description, layout = layout, style={'description_width': 'initial'})
    display(button)
    return(button)

def widget_dropdown(description, items):
    dropdown = widgets.Dropdown(description=description,options=items,disabled=False)
    display(dropdown)
    return(dropdown)

def widget_multiSelect(description,options,value=[]):
    multiSelect = widgets.SelectMultiple(options=options,value=value,description=description,disabled=False, layout = layout, style={'description_width': 'initial'})
    display(multiSelect)
    return(multiSelect)

def widget_output():
    out = widgets.Output(layout={'border': '1px solid black','height':'auto','width':'auto','overflow':'auto'})
    return(out)

def widget_video(file_path,autoplay):
    video = widgets.Video.from_file(file_path, controls=True, autoplay=autoplay, width="950", height="600")
    display(video)
    return(video)

def widget_radio(description, items):
    radio = widgets.RadioButtons(options=items, value=items[0],description=description,disabled=False)
    display(radio)
    return(radio)

def widget_youtube_video(videoid,autoplay):
    video = YouTubeVideo(videoid, autoplay=autoplay, width="950", height="600")
    display(video)
    return(video)


# Pipeline Selection 👷
There are two algorithms available in this notebook.

1. [Toyota Smart Home](https://project.inria.fr/toyotasmarthome). Toyota Smarthome Untrimmed (TSU) is targeting the activity detection task in long untrimmed videos. Therefore, in TSU, the entire recording when the person is visible. The dataset contains 536 videos with an average duration of 21 mins. The dataset is annotated with 51 activities. 

2. [Spatio-Temporal Progressive Learning for Video Action Detection](https://github.com/NVlabs/STEP) STEP a progressive learning framework for spatio-temporal action detection in videos. To learn more, the poster can be found at [Google Drive](https://drive.google.com/file/d/1GWWLH5HQM8FoEIutIOzvtURBI6y09NBr/view). For a more in-depth discussion, the paper can be read at [arxiv](https://arxiv.org/abs/1904.09288). It uses the [AVA Actions v2.1 dataset](https://research.google.com/ava/download.html). The dataset is annotated with 80 activities.

3. [CVPR 2022 MS-TCT](https://github.com/dairui01/MS-TCT). Multi-Scale Temporal ConvTransformer for Action Detection" on Charades dataset (Localization setting, i.e., Charades_v1_localize). To learn more, the paper can be found at [openmaccess](https://openaccess.thecvf.com/content/CVPR2022/papers/Dai_MS-TCT_Multi-Scale_Temporal_ConvTransformer_for_Action_Detection_CVPR_2022_paper.pdf)

# Before You Begin

## TSU - [Toyota Smart Home](https://project.inria.fr/toyotasmarthome)
Please request for the untrimmed dataset available in the [Toyota Smart Home](https://project.inria.fr/toyotasmarthome) website.
1. RGB - Videos in MP4 is required for feature extraction.
    - Place it in `data/TSU/TSU_Videos_mp4/`

## STEP - [Spatio-Temporal Progressive Learning for Video Action Detection](https://github.com/NVlabs/STEP)
This algorithm is more involved, requires more effort to personally acquire the dataset as it is uploaded in youtube.

1. Follow the [STEP README](https://github.com/NVlabs/STEP#installation) for installation instructions.
    - Install APEX.
    - Do not clone STEP as it is included in this repository.
    - In a terminal, cd into STEP, and install external packages with `python setup.py build develop`
2. Install ffmpeg - [Tutorial to add ffmpeg to path in Windows](https://www.youtube.com/watch?v=qjtmgCb8NcE&ab_channel=LinuxLeech)
3. Download the dataset from youtube. In the `step/custom_utils` directory, there are scripts to download the videos.
    1. Install yt-dlp : `pip install yt-dlp`
    2. Run `python get_valid_youtube.py` to get the list of valid videos.
    3. Run `python download_vids.py` to download the videos. (this will take quite a long while as the dataset is large)
    4. There may be some copyright issues when downloading videos. 
        - In `get_valid_youtube.py` , comment and uncomment a specified block of code to remove videos that are not available from the train/val annotations.
        - Run `python get_valid_youtube.py` to generate a new ava_train_v2.1_filter.csv and ava_val_v2.1_filter.csv.
    5. Move videos into `step/datasets/ava/videos`
    6. Generate labels using [Dataset Preparation](https://github.com/NVlabs/STEP#dataset-preparation)
        - `python scripts/generate_label.py datasets/ava_val_v2.1_filter.csv`
        - `python scripts/generate_label.py datasets/ava_train_v2.1_filter.csv`
        - Move generated labels `val.pkl` and `train.pkl` into `datasets/ava/label`

## MS-TCT [Multi-Scale Temporal ConvTransformer for Action Detection](https://github.com/dairui01/MS-TCT)
MS-TCT is built on top of the pre-trained I3D features. Thus, feature extraction is needed before training the network.
1. Please download the Charades dataset (24 fps version) from this [link](https://prior.allenai.org/projects/charades).
2. Follow this [repository](https://github.com/piergiaj/pytorch-i3d) to extract the snippet-level I3D feature.



After preparing the dataset and selecting a pipeline, In JupyterLab, __Kernel > Run Selected Cell and All Below.__


In [None]:
pipelines = ["TSU","STEP","MSTCT"]
pipeline_selected = "TSU"

display_pipeline_output = widget_output()

@display_pipeline_output.capture(clear_output=True,wait=True)
def pipeline_onclick(args):
    global pipeline_selected 
    pipeline_selected= pipeline_radio.value
    if pipeline_selected == "TSU": 
        %cd {tsu_wd}
        print("TSU selected. In Jupyter Lab, Kernel > Run Selected Cell and All Below")
    elif pipeline_selected == "STEP":
        %cd {step_wd}
        print("STEP selected. In Jupyter Lab, Kernel > Run Selected Cell and All Below")
    elif pipeline_selected == "MSTCT":
        %cd {mstct_wd}
        print("MS-TCT selected. In Jupyter Lab, Kernel > Run Selected Cell and All Below")
    else:
        print("Please select a pipeline")
    print("This action sets the pipeline_selected variable to the selected pipeline and will be used as a reference for the rest of the ntoebook.")

pipeline_radio = widget_radio("Pipeline", pipelines)
pipeline_button = widget_button("Select Pipeline")
pipeline_button.on_click(pipeline_onclick)

display(display_pipeline_output)


# Before continuing, take note that some sections do not cover some pipelines.

## TSU.
### All sections available
## STEP
### Most section available except
  - Feature Extraction
  - Create Train/Test Split 
## MS-TCT
 - **Only Training and Evaluation available**

# Data Exploration
Let's explore some of the datasets available in this project.
The following cells allows you to playback the selected video from the selected (TSU/STEP) project. 

In [None]:
if pipeline_selected == "TSU": 
    # TSU Data exploration
    def get_data_dir():
        data = os.listdir(data_path)
        dataset_dirs = []

        # Select only directories, not files.
        for d in data:
            if os.path.isdir(os.path.join(data_path, d)):
                dataset_dirs.append(d)
        return dataset_dirs

    # Selected video path.
    data_path = f"{cwd}/data/"
    video_path = ''
    display_dataset_output = widget_output()

    @interact
    def display_dataset(Dataset=get_data_dir()):
        global video_path
        global display_show_video
        display_show_video = False
        display_dataset_output.clear_output()
        dataset_path = data_path + Dataset
        video_path = ''
        for d in os.listdir(dataset_path):
            if os.path.isdir(os.path.join(data_path + Dataset, d)):
                if 'video' in d.lower() and 'mp4' in d.lower():
                    video_path = data_path + Dataset + '/' + d
                    break

        if video_path == '':
            print('Oops, no video dataset found! Please try another dataset. To specify video directory, please have the keywords video and mp4 in the name of the directory.')
        else:
            choose_video = widgets.Dropdown(
                options= os.listdir(video_path),
                description='Video Name:',
            )
            @display_dataset_output.capture(clear_output=True,wait=True)
            def handle_video_submit(sender):
                with display_dataset_output:
                        print(f"Video Name: {choose_video.value}")
                        widget_video(f"{video_path}/{choose_video.value}",False)

            choose_video_ui = interactive(handle_video_submit,sender = choose_video)
            display(display_dataset_output)
            display(choose_video_ui)
        
        

if pipeline_selected == "STEP":
    def load_ava():
        df_val = pd.read_csv(f"{step_wd}/datasets/ava_val_v2.1_filter.csv", header=None, usecols=[0] , names=['videoid'])
        df_train = pd.read_csv(f"{step_wd}/datasets/ava_val_v2.1_filter.csv", header=None, usecols=[0] , names=['videoid'])
            
        videoids = df_val["videoid"].unique().tolist() + df_train["videoid"].unique().tolist()
        return videoids
    display_step_dataset_output = widget_output()

    
    @display_step_dataset_output.capture(clear_output=True,wait=True)
    def handle_step_submit(sender):
        print(sender)
        with display_step_dataset_output:
            print(f"Video Id: {choose_step_video_dropdown.value}")
            widget_youtube_video(choose_step_video_dropdown.value,True)


    choose_step_video_dropdown = widgets.Dropdown(description="Video Id",options=load_ava())
    choose_step_video_ui = interactive(handle_step_submit,sender = choose_step_video_dropdown)
    display(display_step_dataset_output)
    display(choose_step_video_ui)

# Feature Extraction ✂️
## Overview
This section covers the feature extraction for selected video of your choice after data exploration. The feature extraction uses v-iashin video feature extraction method to obtain I3D RGB and Flow feature dataset. The feature set is later used for training of the model splitting it for train/test split.

## Instructions
1. Run the cell
2. Select the dataset(file path with raw video data needs to be in root directory to be visible)
3. Input the directory where you want the output feature data to be saved at
4. Select the videos that you want to extract
5. Review your settings and run the extraction button


In [None]:
# Feature extraction
if pipeline_selected == "TSU": 
    feature_type = "i3d" #fixed for tsu pipeline
    flow_type = "raft" #dont use pwc because it will generate flow 
    device = "cuda:0" #for multi-gpu usage, refer to github repo on v-iashin
    on_extraction = "save_numpy" #there is option for save_numpy(.npy), save_pickle(.pk1)
    extract_output_path = ""
    streams = "rgb"
    stack_size = 16
    step_size = 16
    extract_video_paths = "" #can be one video path or a list of video path e.g ["file1.mp4", "file2.mp4"]

    # Utility to move files
    def make_dir_and_move(target_dir,extension):
        # make directory
        save_dir_path = f"{target_dir}/{extension}"
        Path(save_dir_path).mkdir(parents=True, exist_ok=True)
        # move file with file extension
        files = os.listdir(target_dir)
        for file in files:
            file_path = os.path.join(target_dir, file)
            # if not directory
            if not os.path.isdir(file_path) and extension in file:
                new_file_name = file.split("_",1)[0] + ".npy"
                old_file_path = f"{Path().absolute()}/{target_dir}/{file}"
                new_file_path= f"{Path().absolute()}/{target_dir}/{extension}/{new_file_name}"
                shutil.move(old_file_path,new_file_path)
        print(f"Features saved in: {Path().absolute().as_posix()}/{target_dir}/{extension}")

    # Run this to select the preferred dataset and videos you like to extract


    # Run this to select the preferred dataset and videos you like to train.
    def get_data_dir():
        data = os.listdir(data_path)
        dataset_dirs = []

        # Select only directories, not files.
        for d in data:
            if os.path.isdir(os.path.join(data_path, d)):
                dataset_dirs.append(d)
        return dataset_dirs

    # Selected video path.
    data_path = f"{cwd}/data/"

    # Selected video path.
    extract_video_path = ''

    extraction_output = widget_output()

    @interact
    def display_dataset(Dataset=get_data_dir()):
        global extract_output_path
        global extract_video_path
        dataset_path = data_path + Dataset
        extract_video_path = ''
        for d in os.listdir(dataset_path):
            if os.path.isdir(os.path.join(data_path + Dataset, d)):
                if 'video' in d.lower() and 'mp4' in d.lower():
                    extract_video_path = data_path + Dataset + '/' + d
                    break
                    
        if extract_video_path == '':
            print('Oops, no video dataset found! Please try another dataset. To specify video directory, please have the keywords video and mp4 in the name of the directory.')
            
        else:
            global videos_in_dir
            videos_in_dir = os.listdir(extract_video_path)
            feature_extraction_output_textbox = widget_textbox("Feature output folder","data/TSU/TSU_Video_features")
            print("Ctrl A to select all videos. Use Shift or Ctrl to select multiple")
            feature_extraction_input_videos = widget_multiSelect("Select videos to extract",videos_in_dir)
            @extraction_output.capture(clear_output=True)
            
            def feature_extraction_onclick(args):
                selected_videos = list(feature_extraction_input_videos.value)
                extract_output_root_path = feature_extraction_output_textbox.value
                extract_output_path =  f"{cwd.as_posix()}/{extract_output_root_path}"
                if len(selected_videos):
                    selected_videos = [extract_video_path + "/"+ video  for video in selected_videos]
                    selected_videos = str(selected_videos).replace("'","")
                    input_videos = f"\"{selected_videos}\""
                    
                    with extraction_output:
                        print("Starting feature extraction")
                        
                        if(Path().absolute().name != "video_features"): 
                            %cd {tsu_wd}/video_features
                        !python main.py feature_type={feature_type} streams={streams} flow_type={flow_type} device={device} video_paths={input_videos} on_extraction={on_extraction} output_path={extract_output_path} stack_size={stack_size} step_size={step_size}
                        %cd {cwd}
                    # time to move files!
                    print(extract_output_root_path)
                    make_dir_and_move(f"{extract_output_root_path}/i3d", "rgb")
                    # make_dir_and_move(f"{extract_output_root_path}/i3d", "flow")
                    %cd {tsu_wd}
                    
                else:
                    print("No videos selected.. please select at least one video before extracting features.")

            feature_extraction_button = widget_button("Extract features")
            feature_extraction_button.on_click(feature_extraction_onclick)
            display(extraction_output)
            
else:
    print("Only required for TSU. Move on!")

# Create Train/Test Split - TSU only 💪
With the features extracted in the previous section, lets create a split csv.
The feature directory can be found in the previous output, or the feature output folder you specified.

1) Fill in the feature directory.
2) Choose CS/CV split.
    - CS: 34.5% testing, 65.5% training. 
    - CV: 31% testing, 69% training.
    - Based on 536 video features available. Results will vary.


In [None]:
split_generate_output = widget_output()
if pipeline_selected == "TSU": 
    @split_generate_output.capture(clear_output=True)
    def split_generate_onclick(args):
        feature_dir = feature_directory_textbox.value
        output_path = split_output_json_textbox.value
        selected_split = split_setting_dropdown.value
        split_setting = f"{cwd.as_posix()}/data/TSU/{'smarthome_CS_51.json' if selected_split == 'CS' else 'smarthome_CV_51.json'}"
        %run -m validate_train_test -feature_dir {feature_dir} -output_path {output_path} -split_setting {split_setting}

    feature_directory_textbox = widget_textbox("Feature directory",f"{cwd.as_posix()}/data/TSU/TSU_Video_features/i3d/rgb")
    split_output_json_textbox = widget_textbox("Split Output",f"{cwd.as_posix()}/data/TSU/my_new_split.json")
    split_setting = ["CS","CV"] 
    split_setting_dropdown = widget_dropdown("Split_setting", split_setting)
    split_generate_button = widget_button("Generate split")
    split_generate_button.on_click(split_generate_onclick)

    display(split_generate_output)
else:
    print("Only required for TSU. Move on!")

# Inference 🔍
Result will return you with a video with captions in each frame indicating the current action.
## Overview
By running the inference cell below, it will return a video with captions in each frame indicating what the machine learning model thinks the subject is doing.

### For STEP,
Only TSU video files or videosets that are available locally can be used. Place it inside ./data/DATASET/DATASET_Videos_mp4/123456.mp4

## Instructions

1) Select a dataset folder from the dropdown list

2) Select a video from the dropdown list

3) ONLY FOR TSU. Indicate whether you want to watch the video playback in real-time using the True/False radio buttons
- If you indicated True, a video popup will appear
- If you indicated False, you can only view the inference video manually in the directory after the inference process has completed

5) Press the "Start Inference" button


In [None]:
# Select the model, dataset and video for inference
    # output for video
display_inference_output = widget_output()
display_inference_result_output = widget_output()

if pipeline_selected == "TSU": 

    model_path = "./model/trained" 

    # Selected video path.
    data_path = f"{cwd.as_posix()}/data/"
    # UI Variables to pass into python program
    # Add fake dataset..?
    inference_dataset = "TSU"
    # Model currently only has PDAN. 
    model = "PDAN"
    # AP type dont need to change. No UI required.
    APtype =  "map"
    # Batch Size ... No UI required.
    batch_size = "1"
    # should display model in ./model/trained
    load_model = './PDAN_TSU_RGB'
    # the rgb/flow features
    root = f'{cwd.as_posix()}/data/TSU/TSU_RGB_i3d_feat/RGB_i3d_16frames_64000_SSD' 
    # Add models to list
    model_list = [os.path.join(dp, f) for dp, dn, filenames in os.walk(model_path) for f in filenames if os.path.splitext(f)[1] != '.pyc' and os.path.splitext(f)[1] != '.py']
    model_list = [each.replace("\\", "/") for each in model_list]

    # Some issues with pretrained model, has to be inside src.
    model_list.insert(0,"./PDAN_TSU_RGB")

    def get_data_dir():
        data = os.listdir(data_path)
        dataset_dirs = []

        # Select only directories, not files.
        for d in data:
            if os.path.isdir(os.path.join(data_path, d)):
                dataset_dirs.append(d)
        return dataset_dirs


    # Global Video Path Variable
    inference_video_path = ''
    inference_dataset_videos = []



    @interact
    def display_models(Model=model_list):
        global load_model
        load_model = Model


    @interact
    def display_dataset(Dataset=get_data_dir()):
        dataset_path = data_path + Dataset
        global inference_dataset
        inference_dataset = Dataset
        global choose_inference_video
        global inference_video_path
        global inference_dataset_videos
        global display_inference_output
        global display_inference_result_output
        inference_video_path = ''
        for d in os.listdir(dataset_path):
            if os.path.isdir(os.path.join(data_path + Dataset, d)):
                if 'video' in d.lower() and 'mp4' in d.lower():
                    inference_video_path = data_path + Dataset + '/' + d
                    break
        # clear when selecting new dataset
        display_inference_output.clear_output()
        if inference_video_path == '':
            inference_dataset_videos = []
            print('Oops, no video dataset found! Please try another dataset. To specify video directory, please have the keywords video and mp4 in the name of the directory.')
        else:
            inference_dataset_videos = [Path(video).with_suffix('').name for video in os.listdir(inference_video_path)]

            choose_inference_video = widgets.Dropdown(
                options= inference_dataset_videos,
                description='Video Name:',
            )

            @display_inference_output.capture(clear_output=True)
            def handle_inference_video_submit(sender):
                with display_inference_output:
                    print(f"Video Name: {choose_inference_video.value}")
                    widget_video(f"{inference_video_path}/{choose_inference_video.value}.mp4", False)


            display(display_inference_output)
            choose_video_ui = interactive(handle_inference_video_submit,sender = choose_inference_video)
            display(choose_video_ui)

            popup_video_radio = widgets.RadioButtons(
                options=['True', 'False'],
                description='Playback',
                disabled=False
            )
            display(popup_video_radio)
            @display_inference_result_output.capture(clear_output=True)
            def start_inference_onclick(args):
                print(choose_inference_video.value)
                with display_inference_result_output:
                    %run inference.py -dataset {inference_dataset} -model {model} -APtype {APtype} -batch_size {batch_size} -load_model {load_model} -root {root} -video_name {choose_inference_video.value} 
                    print(f"Creating inference video {choose_inference_video.value} with pop up: {popup_video_radio.value}")
                    %run -m create_video -selected_video {choose_inference_video.value} -pop_up {popup_video_radio.value}
                    video_path = "./inference_video"
                    widget_video(f"{video_path}/inference_{choose_inference_video.value}.mp4", True)
            start_inference_button = widget_button("Start Inference")
            start_inference_button.on_click(start_inference_onclick)
            display(display_inference_result_output)
            
if pipeline_selected == "STEP": 
    def get_data_dir():
        data = os.listdir(data_path)
        dataset_dirs = []

        # Select only directories, not files.
        for d in data:
            if os.path.isdir(os.path.join(data_path, d)):
                dataset_dirs.append(d)
        return dataset_dirs
    
    
    def set_frame_save_location(video_id):
        ## saving file
        save_dir_path = f"./datasets/demo/frames/{video_id}"
        Path(save_dir_path).mkdir(parents=True, exist_ok=True)
        return save_dir_path

    def extract_frame(video_path):
        clip_length = 1  # seconds
        clip_time_padding = 1.0  # seconds
        video_id = os.path.basename(video_path)
        clip_dir = set_frame_save_location(Path(video_id).stem)
        print("Working on", video_path)
        ffmpeg_command = f"ffmpeg -i {video_path} -start_number 0 -qscale:v 4 {os.path.join(clip_dir,'%06d.jpg')}"
        subprocess.call(ffmpeg_command, shell=True)
        
        
    # Selected video path.
    data_path = f"{cwd}/data/"
    @interact
    def display_dataset(Dataset=get_data_dir()):
        dataset_path = data_path + Dataset
        global choose_step_inference_video
        global step_inference_video_path
        global step_inference_dataset_videos
        for d in os.listdir(dataset_path):
            if os.path.isdir(os.path.join(dataset_path, d)):
                if 'video' in d.lower() and 'mp4' in d.lower():
                    step_inference_video_path = data_path + Dataset + '/' + d
                    break
        print(step_inference_video_path)
        if step_inference_video_path == '':
            step_inference_dataset_videos = []
            print('Oops, no video dataset found! Please try another dataset. To specify video directory, please have the keywords video and mp4 in the name of the directory.')
        else:
            step_inference_dataset_videos = [Path(video).with_suffix('').name for video in os.listdir(step_inference_video_path)]
            
            choose_inference_video = widgets.Dropdown(
                options= step_inference_dataset_videos,
                description='Video Name:',
            )

            #to display the output
            @display_inference_output.capture(clear_output=True)
            def handle_inference_video_submit(sender):
                with display_inference_output:
                    print(f"Video Name: {choose_inference_video.value}")
                    widget_video(f"{step_inference_video_path}/{choose_inference_video.value}.mp4", False)
            display(display_inference_output)
            choose_video_ui = interactive(handle_inference_video_submit,sender = choose_inference_video)
            display(choose_video_ui)
            @display_inference_result_output.capture(clear_output=True)
            def start_inference_onclick(args):
                chosen_video_path = f"{step_inference_video_path}/{choose_inference_video.value}.mp4"
                demo_frame_path = f"{step_wd.as_posix()}/datasets/demo/frames/"
                with display_inference_result_output:
                    print(f"Extracting frames from {choose_inference_video.value}")
                    extract_frame(chosen_video_path)
                    print(f"Inferencing frames from {demo_frame_path}...")
                    %run demo.py -data_root {demo_frame_path}
                    print(f"Creating video from inference results {choose_inference_video.value}...")
                    %run -m create_video -selected_video {choose_inference_video.value} -path {step_wd.as_posix()}
                    video_path = "./inference_video"
                    widget_video(f"{video_path}/inference_{choose_inference_video.value}.mp4", True)
            start_inference_button = widget_button("Start Inference")
            start_inference_button.on_click(start_inference_onclick)
            display(display_inference_result_output)
 



# Training Section 🚞

## Overview

Training is a process where a machine learning model learns from the associated training data.
Models are found in model folder. Select the Architecture and the model config from the two drop down.

## TSU Pipeline

### Instructions

1) Set batch_size using the slider
- batch_size refers to the number of training samples to work through before the update of internal model parameters
- By decreasing the batch_size, the process gets faster at the expense of the performance

2) Set Epoch using the slider
- Epoch refers to the number of times the algorithm will work through the entire training
- The number of epochs is the number of complete passes in the training dataset

3) Enter desired name for the trained model through the input field

4) Press the "Train Model!" button

## STEP Pipeline

1) Set batch_size using the slider
- batch_size refers to the number of training samples to work through before the update of internal model parameters
- By decreasing the batch_size, the process gets faster at the expense of the performance

2) Set Epoch using the slider
- Epoch refers to the number of times the algorithm will work through the entire training
- The number of epochs is the number of complete passes in the training dataset

3) Enter desired name for the trained model through the input field

4) Press the "Train Model!" button

Note: STEP has other parameters that can be explored in the code itself. For simplicity, only the selected few are provided

## MS-TCT Pipeline

1) Set batch_size using the slider
- batch_size refers to the number of training samples to work through before the update of internal model parameters
- By decreasing the batch_size, the process gets faster at the expense of the performance

2) Set Epoch using the slider
- Epoch refers to the number of times the algorithm will work through the entire training
- The number of epochs is the number of complete passes in the training dataset

Note: MS-TCT has other parameters that can be explored in the code itself. For simplicity, only the selected few are provided


In [None]:
# Training

training_output = widget_output()
if pipeline_selected == "TSU": 
    @training_output.capture(clear_output=True, wait=True)
    def on_train(arg):
        split_setting= split_json_textbox.value
        batch_size = train_batch_size_slider.value
        epoch = train_epoch_slider.value
        trained_model_name = trained_model_name_textbox.value
        trained_root = trained_root_textbox.value
        lr = learning_rate_textbox.value
        kernel_size = kernel_size_textbox.value
        num_channel = num_channel_textbox.value
        ap_type = ap_type_radio.value
        with training_output:
            print("Training in progress...")
            %run train.py -dataset {dataset} -split_setting {split_setting} -model {model} -num_channel {num_channel} -lr {lr} -kernelsize {kernel_size} -APtype {ap_type} -epoch {epoch} -batch_size {batch_size} -comp_info {comp_info} -load_model {load_model} -root {root} -trained_model_name {trained_model_name}

    # variables
    dataset = "TSU"
    model = "PDAN"
    num_channel = 512
    lr = 0.0002
    kernel_size = 3
    ap_type = "map"
    epoch = 10
    batch_size = 1
    comp_info = "TSU_CS_RGB_PDAN"
    load_model = 'False'
    root = f'{cwd.as_posix()}/data/TSU/TSU_RGB_i3d_feat/RGB_i3d_16frames_64000_SSD'
    trained_model_name = "best_model_ever" 
    



    print("How many samples per batch to load (default: 2)")
    train_batch_size_slider = widget_slider("Batch size", 2,1,4)
    print("Total number of iterations of all the training data in one cycle for training")
    train_epoch_slider = widget_slider("Epoch", 1000, 1, 1000 )
    print("Where is the split json data at?")
    split_json_textbox = widget_textbox("Split json",f"{cwd.as_posix()}/data/TSU/smarthome_CS_51.json")
    print("Where is the trained root at?")
    trained_root_textbox = widget_textbox('Feature directory',f'{cwd.as_posix()}/data/TSU/TSU_RGB_i3d_feat/RGB_i3d_16frames_64000_SSD')
    print("Select the model to train with")
    trained_model_name_textbox = widget_textbox('New trained model name', "the_best_model_ever")
    print("Tuning parameter in an optimization algorithm that determines the step size at each iteration")
    learning_rate_textbox = widget_textbox("Learning rate", "0.0002")
    print("What is the kernel size?")
    kernel_size_textbox = widget_textbox("Kernel Size", "3")
    print("How many channels is there?")
    num_channel_textbox = widget_textbox("Number of Channels", "512")
    print("Select your AP Type, either map or wap")
    ap_type_radio = widget_radio("AP Type", ["map","wap"])
    train_button = widgets.Button(description = 'Train Model')   
    train_button.on_click(on_train)
    


    display(train_button)
    display(training_output)
    
    

if pipeline_selected == "MSTCT": 
    @training_output.capture(clear_output=True, wait=True)
    def on_train(arg):
        batch_size = train_batch_size_slider.value
        epoch = train_epoch_slider.value
        with training_output:
            print("Training in progress...")
            %run train.py -dataset {dataset} -mode {mode} -model {model} -train {train} -num_clips {num_clips} -skip {skip} -lr {lr} -comp_info {comp_info} -epoch {epoch} -unisize {unisize} -alpha_l {alpha_l} -beta_l {beta_l} -batch_size {batch_size}

    # variables
    dataset = "charades"
    mode = "rgb"
    model = "MS_TCT"
    train = "True"
    num_clips = "256"
    skip = "0"
    lr = 0.0001
    comp_info = "False"
    epoch = 50
    unisize = "True"
    alpha_l = "1"
    beta_l = "0.05"
    batch_size = 32

    train_batch_size_slider = widget_slider("Batch size", 2,1,4)
    train_epoch_slider = widget_slider("Epoch", 1000, 1, 1000 )
    train_button = widgets.Button(description = 'Train Model')   
    train_button.on_click(on_train)


    display(train_button)
    display(training_output)
    
if pipeline_selected == "STEP": 
    @training_output.capture(clear_output=True, wait=True)
    def on_train(arg):
        batch_size = train_batch_size_slider.value
        max_epochs = train_epoch_slider.value
        save_root = trained_root_textbox.value
        with training_output:
            print("Training in progress...")
            # run the script
            %run -m train -- --data_root {data_root} --save_root {save_root} \
            --name {name} --pretrain_path {pretrain_path} --resume_path {resume_path} \
            --base_net {base_net} --det_net {det_net} --max_iter {max_iter} --T {T} \
            --iterative_mode {iterative_mode} --anchor_mode {anchor_mode} --anchor_mode {anchor_mode} --temporal_mode {temporal_mode} \
            --pool_mode {pool_mode} --pool_size {pool_size} --save_step {save_step} --topk {topk} --evaluate_topk {evaluate_topk} \
            --num_workers {num_workers} --max_epochs {max_epochs} --batch_size {batch_size} --print_step {print_step} \
            --optimizer {optimizer} --base_lr {base_lr} --det_lr {det_lr} --det_lr0 {det_lr0} --milestones {milestones} \
            --scale_norm {scale_norm} --do_flip {do_flip} --do_crop {do_crop} --do_photometric {do_photometric} --do_erase {do_erase} \
            --fc_dim {fc_dim} --dropout {dropout} --NUM_SAMPLE {NUM_SAMPLE} --scheduler {scheduler} --warmup_iters {warmup_iters} \
            --cls_thresh {cls_thresh} --reg_thresh {reg_thresh} --max_pos_num {max_pos_num} --neg_ratio {neg_ratio} \
            --freeze_affine {freeze_affine} --freeze_stats {freeze_stats} --lambda_reg {lambda_reg} --lambda_neighbor {lambda_neighbor} 
         
    data_root = f"{step_wd.as_posix()}/datasets/ava/"
    save_root = f"{step_wd.as_posix()}/datasets/ava/cache/"
    pretrain_path = "pretrained/ava_cls.pth"

    name = "STEP"
    base_net = "i3d"
    det_net = "two_branch"
    resume_path = "Auto"

    T = 3
    max_iter = 3    # index starts from 1
    iterative_mode = "temporal"
    anchor_mode = "1"
    temporal_mode = "predict"
    pool_mode = "align"
    pool_size = 7

    # training schedule
    num_workers = 16
    max_epochs = 14
    batch_size = 8
    optimizer = "adam"
    base_lr = 7.5e-5
    det_lr0 = 1.5e-4
    det_lr = 7.5e-4
    save_step = 11465
    print_step = 500
    scheduler = "cosine"
    milestones = "-1"
    warmup_iters = 1000

    # losses
    dropout = 0.3
    fc_dim = 256
    lambda_reg = 5
    lambda_neighbor = 1
    cls_thresh = "0.2,0.35,0.5"
    reg_thresh = "0.2,0.35,0.5"
    max_pos_num = 5
    neg_ratio = 2
    NUM_SAMPLE = -1
    topk = 300
    evaluate_topk = 300

    # data augmentation / normalization
    scale_norm=2    # for i3d
    do_flip="True"
    do_crop="True"
    do_photometric="True"
    do_erase="True"
    freeze_affine="True"
    freeze_stats="True"

    train_batch_size_slider = widget_slider("batch_size", 8,1,32)
    train_epoch_slider = widget_slider("Epoch", 14, 1, 1000 )
    trained_root_textbox = widget_textbox('Enter your save root',f'{step_wd.as_posix()}/datasets/ava/cache/')
    train_button = widgets.Button(description = 'Train Model')   
    train_button.on_click(on_train)


    display(train_button)
    display(training_output)

In [None]:
#link your wandb
wandb.login()
# Display your project workspace
%wandb ict3104-team14-2022/nvda-ml-activity-detection -h 2048


# Test 📝
## Overview
The evaluation process is used to analyze a model's performance by testing it against the ground truth.
## TSU pipeline
### Instructions

1) Set batch_size using the slider
- batch_size refers to the number of training samples to work through before the update of internal model parameters
- By decreasing the batch_size, the process gets faster at the expense of the performance

2) Select model from the the drop down

3) Press the "Evaluate Model" button

## How to read the result of the evaluation?

After the evaluation process completes, it will return the following details about each video sample

1) Mean Average Precision (mAP)
- This value is used to analyze the accuracy of the object detection model.
- The higher the mAP value, the more accurate the model.

2) Average Precision Per Activity Class
- This is also used to measure the accuracy of the model but it is responsible for individual classes.
- There are 51 classes in total and an example is the make_coffee class.
- For classes with the value 0, it suggests that the class is not present in the video sample.

3) Shape
- The shape variable is an array consisting of 2 values.
- The first value of "51" refers to the number of classes which also refers to the action that the subject is performing.
- The second value refers to the length of the video sample divided by 16.

## STEP pipeline
### Instructions

The STEP test script works in a slightly different manner.
It first loads a pretrained model as well as the configuration from your pretrained model. To test, rename your pretrained model to 'ava_step.pth' and place it into the directory as 'step/pretrained/ava_step.pth'.

## MS-TCT pipeline
### Instructions
Takes in a pretrained model. Set the Pickle file Path to be tested.


In [None]:
# To evaluate the trained model chosen based on preferred configuration

evaluation_output = widget_output()

# --------------------------- TSU Pipeline ----------------------------------

if pipeline_selected == "TSU": 
    @evaluation_output.capture(clear_output=True, wait=True)
    def evaluate_onclick(args):
        split_setting= split_json_textbox.value
        batch_size = eval_batch_size_slider.value
        load_model = load_model_textbox.value
        root = evaluate_root_textbox.value
        ap_type = ap_type_radio.value

        with evaluation_output:
            print("Starting evaluation")
            %run evaluation.py -dataset {dataset} -split_setting {split_setting} -model {model} -APtype {AP_type} -batch_size {batch_size} -load_model {load_model} -root {root}
            print("Done!")


    dataset = "TSU"
    model = "PDAN"
    AP_type =  "map"
    load_model = './PDAN_TSU_RGB'

    model_path = "./model/trained" 
    # Add models to list
    model_list = [os.path.join(dp, f) for dp, dn, filenames in os.walk(model_path) for f in filenames if os.path.splitext(f)[1] != '.pyc' and os.path.splitext(f)[1] != '.py']
    model_list = [each.replace("\\", "/") for each in model_list]
    model_list.insert(0,"./PDAN_TSU_RGB")


    # feature file
    time_now  = datetime.datetime.now().strftime('%m_%d_%Y_%H_%M_%S') 
    result_name = f"./results/{load_model}_{time_now}.txt"
    load_model_textbox = widget_dropdown("load_model", model_list)
    eval_batch_size_slider = widget_slider("batch_size", 2,1,4)
    split_json_textbox = widget_textbox("Split json",f"{cwd.as_posix()}/data/TSU/smarthome_CS_51.json")
    evaluate_root_textbox = widget_textbox('Feature directory',f'{cwd.as_posix()}/data/TSU/TSU_RGB_i3d_feat/RGB_i3d_16frames_64000_SSD')
    ap_type_radio = widget_radio("AP Type", ["map","wap"])
    evaluate_button = widget_button("Test TSU Model")

    evaluate_button.on_click(evaluate_onclick)

    display(evaluation_output)
    
# --------------------------- MSTCT Pipeline ----------------------------------

if pipeline_selected == "MSTCT":
    
    @evaluation_output.capture(clear_output=True, wait=True)
    def evaluate_onclick(args):
        pkl_path =  pickle_path_textbox.value
        with evaluation_output:
            print("Starting evaluation")
            %run -m Evaluation -pkl_path {pkl_path}
            print("Done!")
    
    pickle_path_textbox = widget_textbox("Pickle Path",f"{mstct_wd.as_posix()}/save_logit/example_epoch.pkl")
    evaluate_button = widget_button("Test MS_TCT Model") 
    evaluate_button.on_click(evaluate_onclick)
    display(evaluation_output)

# --------------------------- STEP Pipeline ----------------------------------


if pipeline_selected == "STEP": 
    @evaluation_output.capture(clear_output=True, wait=True)
    def evaluate_onclick(args):
        with evaluation_output:
            print("Starting evaluation")
            %run -m eval
            print("Done!")
    evaluate_button = widget_button("Test STEP Model")

    evaluate_button.on_click(evaluate_onclick)

    display(evaluation_output)