# <u>T16 HOI HUB</u>

## Import Libraries

In [None]:
from ipywidgets import widgets
from IPython.display import display, HTML
import os
import io
import base64
import json
from tqdm.notebook import trange, tqdm ###

## Data Exploration

In [None]:
video_folder = os.listdir('data/video')

list_of_video = {}
for video in video_folder:
    # remove hidden file especially for macOS
    if not video.startswith('.'):
        list_of_video[video] = video

## Create Caption files

In [None]:
# read action and frame object from file
def readCaptionFile(filename,videoName):
    # Opening JSON file
    f = open(filename)

    # returns JSON object as 
    # a dictionary
    data = json.load(f)

    # Iterating through the json
    # Closing file
    f.close()
    return data[videoName]["actions"]

# convert frame to time
def convertFrameToTime(frame):
    seconds = int(frame/25)
    minutes = "00"
    if seconds >= 60:
        minutes = str(seconds // 60)
        seconds = seconds % 60
    if len(minutes) == 1:
        minutes = "0" + minutes
    seconds = str(seconds)
    #may need handle hour
    if len(seconds) == 1:
        seconds = "0" + seconds 
    return (minutes + ":" + seconds + ".000")

# read reference text from txt file
def readReferenceFile(refFile):
    referenceDict = {}
    with open(refFile) as f:
        lines = f.readlines()
    for i in lines:
        x = i.split()
        referenceDict[str(x[0])] = x[1]
    return referenceDict

# create caption file
def formatCaptionFile(captionList, reference, captionPath):
    start = "WEBVTT\n\n"
    captions = []
    for i in captionList:
        text = reference[str(i[0])]
        lines = convertFrameToTime(i[1]) + " --> " + convertFrameToTime(i[2]) + "\n" + text + "\n\n"
        captions.append(lines)
    f = open(captionPath, "w")
    f.write(start)
    f.writelines(captions)
    f.close()

In [None]:
video_dropdown = widgets.Dropdown(
    options = list_of_video,
    description = 'Videos',
)

video_src = 'data/video/' + video_dropdown.value

def play_video(video_src,caption_src):
    video = io.open(video_src, 'r+b').read()
    encoded = base64.b64encode(video)
    return(HTML(data='''<video width="650" height="360" controls>
        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
        <track kind="captions" src={1} srclang="en" label="English" default>
        </video>'''.format(encoded.decode('ascii'),caption_src)))


# video dropdown onchange function
def video_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        change['new']
        global video_src 
        video_src = 'data/video/'+ video_dropdown.value
        
          
# display video dropdown
video_dropdown.observe(video_on_change)
display(video_dropdown) 


In [None]:
# caption
videoName = video_dropdown.value.split(".")
#location of reference are place at root
ref = readReferenceFile('all_labels.txt')
# may need change the caption path to dynamic
captionPath = "data/video/" + videoName[0] + ".vtt"
# model result file should be some directory, here using root 
captionList = readCaptionFile('smarthome_CS_51.json',videoName[0])
formatCaptionFile(captionList,ref,captionPath)


video = video_src.split('/')[-1]
print("Currently playing : " + video)
play_video(video_src, captionPath)

## Extract i3D features from video

In [None]:
import models

In [None]:
from pathlib import Path
from omegaconf import OmegaConf

In [None]:
# after the desired videos are selected
# video_paths: list[str] = ["../data/RGB_Video_MP4/P02T01C06.mp4"]
video_paths = open("HOI/I3D/video_list.txt").readlines()
video_paths = list(map(lambda video: "../data/RGB_Video_MP4/{0}.mp4".format(video.strip()), video_paths))
# TODO: choose a project-relative directory
output_path = "/media/starlight/2c72c05a-ec96-4c96-ba3c-50ae4bc6730b/home/starlight/TSU/data/RGB_i3d_test"

In [None]:
i3d_defaults = OmegaConf.load(Path("feature_extractor/configs/i3d.yml"))
i3d_config = OmegaConf.merge(i3d_defaults, OmegaConf.create({
    "feature_type": "i3d",
    "streams": "rgb",
    "output_path": output_path,
    "video_paths": video_paths,
    "on_extraction": "save_numpy",
    "stack_size": 16,
    "step_size": 16
}))
extractor = models.ExtractI3D(i3d_config)

In [None]:
for video in tqdm(video_paths, desc="videos extracted"):
    extractor._extract(video)

## Inference

In [None]:
import torch
# Example: to be modified
# lets say TSU has been selected
from TSU_PDAN import HOI_PDAN
from HOI.smarthome_i3d_per_video import TSU as Dataset
from HOI.smarthome_i3d_per_video import TSU_collate_fn as collate_fn

In [None]:
# TODO: v-iashin
# lets say TSU smarthome is then selected (in practice, a custom json is generated depending on what specific videos are selected)
val_dataset = Dataset("../data/RGB_i3d_test/smarthome_CS_51.json", 'testing', "../data/RGB_i3d_test", 1, 51)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=4,
                                                pin_memory=True, collate_fn=collate_fn)
# val_dataloader.root = args.rgb_root

In [None]:
modelrunner = HOI_PDAN()
modelrunner.PDAN_training_parameters()
modelrunner.model.load_state_dict(torch.load("data/pretrained_model/PDAN/weight_epoch_1"))

In [None]:
result = None
# note: this doesn't appear to show up properly in vscode
with tqdm(val_dataloader, unit='batch') as progressive_loader:
    result = modelrunner.evaluate(progressive_loader)
result

## Training HOI ML Model

In [None]:
train_dataset = Dataset("../data/RGB_i3d_test/smarthome_CS_51.json", 'training', "../data/RGB_i3d_test", 1, 51)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=4,
                                                pin_memory=True, collate_fn=collate_fn)

In [None]:
with trange(0,5, unit='epoch', desc='epochs') as epoch_range:
    for model in modelrunner.train(
        train_dataloader=train_dataloader, 
        val_dataloader=val_dataloader, 
        epoch_range=epoch_range
    ):
        # save model snapshot at this epoch
        torch.save(model.state_dict(),'./data/pretrained_model/PDAN/weight_epoch_'+str(modelrunner.epoch))
        torch.save(model,'./data/pretrained_model/PDAN/model_epoch_'+str(modelrunner.epoch))

## Evaluate

In [None]:
##Evaluate Model
pretrained_model_folder = os.listdir('data/pretrained_model')

list_of_ptModels = {}
for model in pretrained_model_folder:
    # remove hidden file especially for macOS
    if not model.startswith('.'):
        list_of_ptModels[model] = model

evaluation_dropdown = widgets.Dropdown(
    options = list_of_ptModels,
    description = 'evaluation',
)

model_src = 'data/pretrained_model/' + evaluation_dropdown.value

# evaluation dropdown onchange function
def evaluation_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        change['new']
        global model_src 
        model_src = 'data/video/'+ evaluation_dropdown.value
        
          
# display evaluation dropdown
evaluation_dropdown.observe(evaluation_on_change)
display(evaluation_dropdown) 

In [None]:
run = f"python test.py -dataset TSU -mode rgb -split_setting CS -model PDAN -train False -num_channel 512 -lr 0.0002 -kernelsize 3 -APtype map -batch_size 1 -comp_info TSU_CS_RGB_PDAN -load_model {model_src} -video {video_dropdown.value}"
!{run}