# Experiment Data

In [163]:
from pathlib import Path
import pandas as pd
import numpy as np
import re
import pprint as pp

In [6]:
config, _ = load_module(Path("../system/config/config.py"))

In [4]:
def load_module(path: Path, package=None):
    name = package + "." + path.stem if package else path.stem
    spec = importlib.util.spec_from_file_location(name, path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module, spec   

In [115]:
def split_filename(exp_dir):
    match = re.search('(.*)_([0-9]*)[-_]([0-9]*)', exp_dir.stem)
    dt = pd.to_datetime(match.group(2) + " " + match.group(3), format="%Y%m%d %H%M%S")
    name = match.group(1)
    return name, dt

In [118]:
def list_experiments():
    exp_dirs = list(filter(lambda p: p.is_dir(), config.experiment_data_root.glob("*")))
    dts = []
    names = []
    for exp_dir in exp_dirs:
        name, dt = split_filename(exp_dir)
        dts.append(dt)
        names.append(name)

    df = pd.DataFrame(columns=["name", "dir"], index=dts)
    df.name = names
    df.dir = exp_dirs
    return df.sort_index()


In [96]:
def experiment_stats(exp_dir):
    exp_path = Path(exp_dir)
    video_files = list(exp_path.glob("*.mp4")) + list(exp_path.glob("*.avi"))
    image_files = list(exp_path.glob("*.png")) + list(exp_path.glob("*.jpg"))
    csv_files = list(exp_path.glob("*.csv"))
    
    return {
        "video_count": len(list(video_files)),
        "image_count": len(list(image_files)),
        "csv_count": len(list(csv_files)),
    }

def all_experiment_stats(exps):
    df = pd.DataFrame(columns=["video_count", "image_count", "csv_count"])
    vids = []
    imgs = []
    csvs = []
    
    for exp_dir in exps.dir:
        stats = experiment_stats(exp_dir)
        vids.append(stats["video_count"])
        imgs.append(stats["image_count"])
        csvs.append(stats["csv_count"])
    df.video_count = vids
    df.image_count = imgs
    df.csv_count = csvs
    
    return df

In [119]:
exps = list_experiments()

In [120]:
pd.set_option('display.max_rows', 500)
index = exps.index
exps = pd.concat([exps.reset_index(drop=True), all_experiment_stats(exps)], axis=1)
exps.index = index

In [123]:
exps.head()

Unnamed: 0,name,dir,video_count,image_count,csv_count
2021-03-21 20:14:05,yolo_test,/data/reptilearn/experiments/yolo_test_2021032...,0,0,0
2021-03-21 20:15:54,yolo_test,/data/reptilearn/experiments/yolo_test_2021032...,0,0,0
2021-03-21 20:16:36,yolo_test,/data/reptilearn/experiments/yolo_test_2021032...,0,0,0
2021-03-21 20:32:14,test_exp,/data/reptilearn/experiments/test_exp_20210321...,0,0,0
2021-03-21 20:52:32,test_exp,/data/reptilearn/experiments/test_exp_20210321...,0,0,0


In [201]:
def experiment_info(exp_dir):
    exp_dir = Path(exp_dir)
    info = {}
    
    ts_paths = []
    videos = list(exp_dir.glob("*.mp4")) + list(exp_dir.glob("*.avi"))
    info["videos"] = {}
    for vid_path in videos:
        name, dt = split_filename(vid_path)
        ts_path = exp_dir / (vid_path.stem + ".csv")
        if not ts_path.exists():
            ts_path = None
            duration = None
        else:
            ts_paths.append(ts_path)

            tdf = pd.read_csv(ts_path, parse_dates=True)
            duration = ((tdf.iloc[-1, 0] - tdf.iloc[0, 0]) * 1e09).astype('timedelta64[ns]')
            
        info["videos"][(name, dt)] = {
            "path": vid_path,
            "timestamps": ts_path,
            "frame_count": tdf.shape[0],
            "duration": duration,
        }
    info["csvs"] = list(filter(lambda p: p not in ts_paths, exp_dir.glob("*.csv")))
    info["images"] = list(exp_dir.glob("*.jpg")) + list(exp_dir.glob("*.png"))
    
    return info
    

In [202]:
exp = exps[exps.video_count == 4].dir[-10]
info = experiment_info(exp)
pp.pprint(info)

{'csvs': [PosixPath('/data/reptilearn/experiments/learn_exp_t11fixmqtt_20210511-141240/learn_data.csv'),
          PosixPath('/data/reptilearn/experiments/learn_exp_t11fixmqtt_20210511-141240/events.csv')],
 'images': [],
 'videos': {('back', Timestamp('2021-05-11 14:12:41')): {'duration': numpy.timedelta64(6703808027982,'ns'),
                                                         'frame_count': 348792,
                                                         'path': PosixPath('/data/reptilearn/experiments/learn_exp_t11fixmqtt_20210511-141240/back_20210511-141241.mp4'),
                                                         'timestamps': PosixPath('/data/reptilearn/experiments/learn_exp_t11fixmqtt_20210511-141240/back_20210511-141241.csv')},
            ('left', Timestamp('2021-05-11 14:12:41')): {'duration': numpy.timedelta64(6703805832624,'ns'),
                                                         'frame_count': 349233,
                                                       

In [144]:
pd.read_csv(info["csvs"][1])

Unnamed: 0,time,x1,y1,x2,y2,confidence


In [203]:
trials0511 = exps["2021-5-11" :]
info = experiment_info(trials0511[trials0511.name == "nightly"].dir[0])
pp.pprint(info)

{'csvs': [PosixPath('/data/reptilearn/experiments/nightly_20210511-204555/learn_data.csv'),
          PosixPath('/data/reptilearn/experiments/nightly_20210511-204555/head_bbox.csv'),
          PosixPath('/data/reptilearn/experiments/nightly_20210511-204555/events.csv')],
 'images': [],
 'videos': {('back', Timestamp('2021-05-11 20:45:55')): {'duration': numpy.timedelta64(1223818696022,'ns'),
                                                         'frame_count': 62930,
                                                         'path': PosixPath('/data/reptilearn/experiments/nightly_20210511-204555/back_20210511-204555.mp4'),
                                                         'timestamps': PosixPath('/data/reptilearn/experiments/nightly_20210511-204555/back_20210511-204555.csv')},
            ('back', Timestamp('2021-05-12 04:21:57')): {'duration': numpy.timedelta64(21683130214691,'ns'),
                                                         'frame_count': 1166906,
               

In [167]:
tdf = pd.read_csv(info["videos"]["top"]["timestamps"], parse_dates=True)
delta = (tdf.iloc[-1, 0] - tdf.iloc[0, 0]) * np.timedelta64(1, 's')

In [173]:
delta / np.timedelta64(1, 'h')

0.3397222222222222

In [185]:
np.timedelta64(tdf.iloc[0,0], "s")

ValueError: Could not convert object to NumPy timedelta

In [194]:
d = ((tdf.iloc[-1, 0] - tdf.iloc[0,0]) * 1e09).astype('timedelta64[ns]')

In [197]:
d.astype('timedelta64[m]')

numpy.timedelta64(20,'m')