# Download, annotate, and clip CIS-PD Videos

Project status:
- COMPLETE: Download cycle videos for list of subjects
- COMPLETE: Annotate videos
- COMPLETE: Clip videos into activities
Using 'mp4 metadata extract.ipynb' in temp-notebooks repo
- COMPLETE: Get start times for videos using Python
- COMPLETE: Use start/stop frame number and convert to UTC
- COMPLETE: Modify GUI function to use my start/stop times to label data for relevant subjects and cycles below

Videos to download, annotate and clip:
- 1003 - all cycles
- 1005 - skip cycle 5, all cycles
- 1007 - cycle 1-4
- 1009 - 1, 4, 6
- 1019 - 4
- 1023 - 2-6
- 1024 - 3
- 1039 - 1-6
- 1043 - 2-6
- 1048 - 1-5
- 1050 - 1-6

## File structure
- Each subject's cycle videos are compiled into the 'raw_annotations.csv' file in each subject's folder.
- Each cycle video is clipped, dumped into corresponding cycle folder (ie. cycle1), and named by the short name (ie. Wlkg for Walking).
- Pertinent notes for each subject are noted below under the corresponding subject heading.

Activity List:

Shaking (omitted)

Standing

Walking

Walking while counting

Finger to nose--right hand

Finger to nose--left hand

Alternating right hand movements

Alternating left hand movements

Sit to stand

Drawing on a paper

Typing on a computer keyboard

Assembling nuts and bolts

Taking a glass of water

Organizing sheets in a folder

Folding towels

Sitting

In [2]:
# Importing the Libraries
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
import datetime as dt
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

## Run Video Annotator in Terminal

If pythonvideoannotator is installed,

for windows:

    activate videoannotator

    python -m pythonvideoannotator
    
for mac:
    
    source activate videoannotator
    
    python -m pythonvideoannotator

# Check RTO Network Drive and change path(s) as needed
- X:, Y: or Z: ?

# Create dictionaries to decode id for videos

In [3]:
# Dataframe of subject ids to match
path = r'X:\CIS-PD MUSC\decoded_forms'
filename = os.path.join(path, 'videoID.csv') # ie. file = 'videoID.csv'
subjid_df = pd.read_csv(filename)
# Change SubjectCode column from type float to int
subjid_df.SubjectCode = subjid_df.SubjectCode.astype('int')

In [4]:
# Dictionary of SubjecctCode: FoxInsightID
id_dict = subjid_df.set_index('SubjectCode').to_dict()['FoxInsightID']
reverse_id_dict = subjid_df.set_index('FoxInsightID').to_dict()['SubjectCode']

In [5]:
# Check id for video downloads
# get subject id from dictionary
id = reverse_id_dict.get('ciscij10','Unknown')
print('ciscij10', ' is ', id)

ciscij10  is  1043


In [4]:
def dataclean(file, path, id): 
    """Function takes pythonVideoAnnotator file and cleans up into dataframe."""
    
    colnames = ['subject code', '1', 'start frame', 'stop frame', 'activity', '2', 'cycle']
    selectedcol= ['subject code','start frame', 'stop frame', 'activity', 'cycle']
    #df = pd.read_csv(os.path.join(path, 'cycle1.csv'), names=colnames, usecols=selectedcol)
    filename = os.path.join(path, file) # ie. file = 'cycle1.csv'
    df = pd.read_csv(filename, names=colnames, usecols=selectedcol)
    df = df.dropna()
    df['cycle'] += 1
    df['start frame'] = pd.to_numeric(df['start frame'], downcast='integer')
    df['stop frame'] = pd.to_numeric(df['stop frame'], downcast='integer')
    df['cycle'] = pd.to_numeric(df['cycle'], downcast='integer')
    df['subject code'] = id
    
    return df

In [5]:
def frame_to_sec(frames,fps=30):
    """Convert frames with fps sampling rate to time in total sec."""
    seconds = int((frames / (60 * fps) % 60)*60)
    return seconds

In [19]:
def convert_frame_to_sec(df, subj_path, cycle_num=6):
    """Function takes dataframe converts frames column to seconds to be used later to clip video."""
    # Example call: convert_frame_to_sec(os.path.join(path, '1030annotated.csv'))

    # add column of activity abbreviated name
    # Note: omitted 'Shaking' activity
    activity = ('Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 
                  'Drwg', 'Typg', 'NtsBts', 'Drnkg', 'Sheets', 'Fldg', 'Sitng')
    df['shortname'] = np.array(activity*cycle_num)
    
    # convert frames to sec
    df['start time sec'] = df['start frame'].apply(frame_to_sec)
    df['stop time sec'] = df['stop frame'].apply(frame_to_sec)
    
    # save updated file
    fname = 'sec_annotation.csv'
    filename = os.path.join(subj_path, fname)
    with open(filename,'wb') as f:
        df.to_csv(filename, sep=',')
        
    return df

In [23]:
def convert_and_clip(id, id_dict=id_dict, cycle_num=6):
    """Input 4 digit id, then it will execute all functions to clips videos.
    Note: Need to specify id before calling function.
    """
    # get subject id
    id = id_dict.get(id, 'Unknown')
    file = 'raw_annotation.csv'
    path = r'X:\CIS-PD Videos'
    subj_path = os.path.join(path,id)
    df = dataclean(file, subj_path, id)
    df = df.reset_index(drop=True)

    # convert frame to seconds to clip video
    df = convert_frame_to_sec(df, subj_path, cycle_num)
    
    # clip video by activity
    clip_fullvideo(df, subj_path)
    
    return df

In [25]:
def clip_fullvideo(df, subj_path):
    """Takes video with complete activity set and clips them into separate activities.
    Args: df = dataframe that has the timestamps for start and stop in sec
          video_name = full video name with complete activity set (ie. cisuabd4_cycle1)
    """
    # convert to column from int to str type to use for video names
    df['cycle'] = df['cycle'].apply(str)
    
    # Loop through and clip each activity to corresponding cycle video
    for i in range(len(df)):
        cycleindex = df['cycle'][i]
        video_name = id + '_cycle' + cycleindex + '.mp4' # omit since there's an arg?
        video_full = os.path.join(subj_path, video_name)
        t1 = df['start time sec'][i]
        t2 = df['stop time sec'][i]
        activity = df['shortname'][i]
        path_for_target = subj_path + '\cycle' + cycleindex + '\\'
        targetname = path_for_target + activity + '.mp4'
        print(activity, t1, t2, targetname)

        # clip activity video
        ffmpeg_extract_subclip(video_full, t1, t2, targetname=targetname)

# 1003 - all cycles

In [13]:
# get subject id from dictionary
id = id_dict.get(1003, 'Unknown')
print(id)

cisuabd4


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1003)

# 1005 - skip cycle 5, all cycles
- Note: No cycle 6 video

In [15]:
# get subject id from dictionary
id = id_dict.get(1005, 'Unknown')
print(id)

cisuabe5


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1005, cycle_num = 4)

# 1007 - cycle 1-4

In [26]:
# get subject id from dictionary
id = id_dict.get(1007, 'Unknown')
print(id)

cisuabf6


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1007, cycle_num = 4)

# 1009 - cycle 1, 4, 6

In [29]:
# get subject id from dictionary
id = id_dict.get(1009, 'Unknown')
print(id)

cisuabg7


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1009, cycle_num = 3)

# 1019 - cycle 4

In [32]:
# get sub## annotated videoject id from dictionary
id = id_dict.get(1019, 'Unknown')
print(id)

cisnwe5


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1019, cycle_num = 1)

# 1023 - cycle 2-6

In [38]:
# get subject id from dictionary
id = id_dict.get(1023, 'Unknown')
print(id)

ciscid4


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1023, cycle_num = 5)

# 1024 - cycle 3

In [40]:
# get subject id from dictionary
id = id_dict.get(1024, 'Unknown')
print(id)

cisnwf6


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1024, cycle_num = 1)

# 1039 - cycle 1-6

In [42]:
# get subject id from dictionary
id = id_dict.get(1039, 'Unknown')
print(id)

ciscih8


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1039, cycle_num = 6)

# 1043 - cycle 2-6

In [52]:
# get subject id from dictionary
id = id_dict.get(1043, 'Unknown')
print(id)

ciscij10


In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip(1043, cycle_num = 5)

# 1048 - cycle 1-5
- raw Video name: added 'cycle' + corresponding number + '_' as prefix to name
- cycle1: video names are cycle1_MVI_0032 (towel folding), cycle1_MVI_0033 (towel folding), cycle1_MVI_0034 (sit)
- cycle3: only video is watch shaking

In [50]:
# get subject id from dictionary
id = id_dict.get(1048, 'Unknown')
print(id)

cisrol12


## Did NOT annotate video as it was already clipped

### cycle1 video names:
- cycle1_MVI_0032   Fldg_trial1
- cycle1_MVI_0033   Fldg_trial2
- cycle1_MVI_0034   Sitng

### cycle2 video names:
- cycle2_MVI_0035   shaking watch stand
- cycle2_MVI_0036   shaking watch sit
- cycle2_MVI_0037   Stndg
- cycle2_MVI_0038   Wlkg
- cycle2_MVI_0039   WlkgCnt
- cycle2_MVI_0042   FtnR
- cycle2_MVI_0043   FtnL
- cycle2_MVI_0044   RamR
- cycle2_MVI_0045   RamL
- cycle2_MVI_0046   SitStand
- cycle2_MVI_0047   Drwg
- cycle2_MVI_0048   Typg
- cycle2_MVI_0049   NtsBts
- cycle2_MVI_0050   ***no activity***
- cycle2_MVI_0051   Drnkg
- cycle2_MVI_0052   Sheets_trial1
- cycle2_MVI_0053   Sheets_trial2
- cycle2_MVI_0054   Fldg
- cycle2_MVI_0055   Sitng

### cycle4 video names:
- missing: Wlkg, WlkgCnt, Sheets, Fldg
- cycle4_MVI_0075   shaking watch stand
- cycle4_MVI_0076   Stndg
- cycle4_MVI_0078   FtnR
- cycle4_MVI_0079   FtnL
- cycle4_MVI_0080   RamR
- cycle4_MVI_0081   RamL
- cycle4_MVI_0082   SitStand
- cycle4_MVI_0083   Drwg
- cycle4_MVI_0084   Typg
- cycle4_MVI_0085   NtsBts
- cycle4_MVI_0086   Drnkg
- cycle4_MVI_0089   Sitng

### cycle5 video names:
- missing: Stndg, Wlkg, WlkgCnt, FtnR, Drnkg, Sheets, Fldg
- cycle5_MVI_0095   FtnL
- cycle5_MVI_0096   RamR
- cycle5_MVI_0097   RamL
- cycle5_MVI_0098   SitStand
- cycle5_MVI_0100   Drwg
- cycle5_MVI_0101   Typg
- cycle5_MVI_0102   NtsBts
- cycle5_MVI_0106   Sitng

# 1050 - cycle 1-6
- note: There is NO cycle 1 video
- Cycle6 is split into 2 parts, so used modified functions to annotate and clip videos
- Part1 activities: standing - Nuts and Bolts (part 1)
- Part2 activities: Nuts and Bolts (part 2) - sitting
- cycle 6 part 1 can be considered as cycle 6 in annotated file
- cycle 6 part 2 can be considered as cycle 7 in annotated file
- Nuts and Bolts activity have a clip in cycle 6 for part 1 and part 2 separated

In [54]:
# get subject id from dictionary
id = id_dict.get(1050, 'Unknown')
print(id)

cisuabn14


In [63]:
def clip_fullvideo_1050(df, subj_path):
    """Takes video with activity set for subject 1050 and clips them into separate activities.
    Args: df = dataframe that has the timestamps for start and stop in sec
          video_name = full video name with complete activity set (ie. cisuabd4_cycle1)
    """
    # convert to column from int to str type to use for video names
    df['cycle'] = df['cycle'].apply(str)
    
    # Loop through and clip each activity to corresponding cycle video
    for i in range(len(df)):
        cycleindex = df['cycle'][i]
        
        # specify condition to split video name for cycle 6 into part 1 and 2
        if cycleindex == '6':
            video_name = id + '_cycle' + '6_part1' + '.mp4'
        elif cycleindex == '7':
            video_name = id + '_cycle' + '6_part2' + '.mp4'
        else:
            video_name = id + '_cycle' + cycleindex + '.mp4' # omit since there's an arg?
        video_full = os.path.join(subj_path, video_name)
        t1 = df['start time sec'][i]
        t2 = df['stop time sec'][i]
        activity = df['shortname'][i]
        
        # specify condition to split cycle 6 into part 1 and 2 folders
        if cycleindex == '6':
            path_for_target = subj_path + '\cycle' + '6_part1' + '\\'
        elif cycleindex == '7':
            path_for_target = subj_path + '\cycle' + '6_part2' + '\\'
        else:
            path_for_target = subj_path + '\cycle' + cycleindex + '\\'
        targetname = path_for_target + activity + '.mp4'

        # clip activity video
        ffmpeg_extract_subclip(video_full, t1, t2, targetname=targetname)

In [59]:
def convert_frame_to_sec_1050(df, subj_path, cycle_num=6):
    """Function takes dataframe converts frames column to seconds to be used later to clip video."""
    # Example call: convert_frame_to_sec(os.path.join(path, '1030annotated.csv'))

    # add column of activity abbreviated name
    # Note: omitted 'Shaking' activity
    activity = ('Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 
                  'Drwg', 'Typg', 'NtsBts', 'Drnkg', 'Sheets', 'Fldg', 'Sitng')
    activity6part1 = ('Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 
                  'Drwg', 'Typg', 'NtsBts')
    activity6part2 = ('NtsBts', 'Drnkg', 'Sheets', 'Fldg', 'Sitng')
    df['shortname'] = np.array(activity*4 + activity6part1 + activity6part2)
    
    # convert frames to sec
    df['start time sec'] = df['start frame'].apply(frame_to_sec)
    df['stop time sec'] = df['stop frame'].apply(frame_to_sec)
    
    # save updated file
    fname = 'sec_annotation.csv'
    filename = os.path.join(subj_path, fname)
    with open(filename,'wb') as f:
        df.to_csv(filename, sep=',')
        
    return df

In [65]:
def convert_and_clip_1050(id, id_dict=id_dict):
    """Input 4 digit id, then it will execute all functions to clips videos.
    Note: Need to specify id before calling function.
    """
    # get subject id
    id = id_dict.get(id, 'Unknown')
    file = 'raw_annotation.csv'
    path = r'X:\CIS-PD Videos'
    subj_path = os.path.join(path,id)
    df = dataclean(file, subj_path, id)
    df = df.reset_index(drop=True)

    # convert frame to seconds to clip video
    df = convert_frame_to_sec_1050(df, subj_path)
    
    # clip video by activity
    clip_fullvideo_1050(df, subj_path)
    
    return df

In [None]:
# convert frames to sec to prepare for clipping videos
df = convert_and_clip_1050(1050)

# Create sec_annotation.csv for cisnwh8 from existing file
- videos were already clipped into each cycle
- annotation file in different format, needs to be converted to match rest of subjects

In [7]:
# get subject id from dictionary
id = id_dict.get(1030, 'Unknown')
print(id)

cisnwh8


In [33]:
# read in file
path = r'X:\CIS-PD Videos\cisnwh8'
filename = os.path.join(path, '1030_time_in_sec.csv') # ie. file = 'videoID.csv'
temp = pd.read_csv(filename)


In [34]:
# make changes to create sec_annotation.csv file consistent with other subjects

# remove columns
temp = temp.drop(columns=['Unnamed: 0', 'subject code'])
# change column names
temp = temp.rename(index=str, columns={'start time min':'start time sec', 'stop time min':'stop time sec',
                                      'start time':'start frame', 'stop time':'stop frame'})
# add 'subject code' column with value set to id
temp['subject code']=id
# change column order
temp = temp[['subject code','start frame','stop frame','activity','cycle','shortname','start time sec','stop time sec']]

In [48]:
# save updated file
path = r'X:\CIS-PD Videos'
subj_path = os.path.join(path,id)
fname = 'sec_annotation.csv'
filename = os.path.join(subj_path, fname)
with open(filename,'wb') as f:
    temp.to_csv(filename, sep=',')