# Download, annotate, and clean CIS-PD Videos

Project status:
- COMPLETE: Download cycle videos for list of subjects
- COMPLETE: Annotate videos
- IN PROGRESS: Clip videos into activities
    - fix dataclean()
- Get start times for videos using Python
- Use start/stop frame number and convert to UTC
- Modify GUI function to use my start/stop times to label data for relevant subjects and cycles below

Videos to download, annotate and clip:
- 1003 - all cycles
- 1005 - skip cycle 5, all cycles
- 1007 - cycle 1-4
- 1009 - 1, 4, 6
- 1019 - 4
- 1023 - 2-6
- 1024 - 3
- 1039 - 1-6
- 1043 - 2-6
- 1048 - 1-5
- 1050 - 1-6

## File structure
- Each subject's cycle videos are compiled into the 'raw_annotations.csv' file in each subject's folder.
- Each cycle video is clipped, dumped into corresponding cycle folder (ie. cycle1), and named by the short name (ie. Wlkg for Walking).
- Pertinent notes for each subject are noted below under the corresponding subject heading.

In [1]:
# Importing the Libraries
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
import datetime as dt
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

## Run Video Annotator in Terminal

If pythonvideoannotator is installed,

for windows:

    activate videoannotator

    python -m pythonvideoannotator
    
for mac:
    
    source activate videoannotator
    
    python -m pythonvideoannotator

# Check RTO Network Drive and change path(s) as needed
- Y: or Z: ?

# Create dictionaries to decode id for videos

In [2]:
# Dataframe of subject ids to match
path = r'Z:\CIS-PD MUSC\decoded_forms'
filename = os.path.join(path, 'videoID.csv') # ie. file = 'videoID.csv'
subjid_df = pd.read_csv(filename)
# Change SubjectCode column from type float to int
subjid_df.SubjectCode = subjid_df.SubjectCode.astype('int')

print(subjid_df.head(3))

   Unnamed: 0  SubjectCode FoxInsightID  Subj ID Athena User Name     Site
0           0         1000     cisuaba1        142557.0  cisuaba1  alabama
1           1         1001     cisuabb2        142558.0  cisuabb2  alabama
2           2         1002     cisuabc3        142559.0  cisuabc3  alabama


In [3]:
# Dictionary of SubjecctCode: FoxInsightID
id_dict = subjid_df.set_index('SubjectCode').to_dict()['FoxInsightID']
reverse_id_dict = subjid_df.set_index('FoxInsightID').to_dict()['SubjectCode']

In [4]:
# Check id for video downloads
# get subject id from dictionary
id = reverse_id_dict.get('ciscij10','Unknown')
print('ciscij10', ' is ', id)

ciscij10  is  1043


## Ignore since I'm not doing this. 
Make sure dataclean() starts index at 0 since it's messing up the next func for +1 on stop time loop.

In [7]:
def dataclean(file, path, id): 
    """Function takes pythonVideoAnnotator file and cleans up into dataframe."""
    
    colnames = ['subject code', '1', 'start frame', 'stop frame', 'activity', '2', 'cycle']
    selectedcol= ['subject code','start frame', 'stop frame', 'activity', 'cycle']
    #df = pd.read_csv(os.path.join(path, 'cycle1.csv'), names=colnames, usecols=selectedcol)
    filename = os.path.join(path, file) # ie. file = 'cycle1.csv'
    df = pd.read_csv(filename, names=colnames, usecols=selectedcol)
    df = df.dropna()
    df['cycle'] += 1
    df['start frame'] = pd.to_numeric(df['start frame'], downcast='integer')
    df['stop frame'] = pd.to_numeric(df['stop frame'], downcast='integer')
    df['cycle'] = pd.to_numeric(df['cycle'], downcast='integer')
    df['subject code'] = id
    print(df.head(3))
    
    return df

In [None]:
# sample code to concatenate dataframes if needed
# Majority of raw_annotations should have all relevant data in one dataframe
df1 = dataclean('cycle1.csv')
df2 = dataclean('cycle2.csv')
df3 = dataclean('cycle3incomplete.csv')
df4 = dataclean('cycle36.csv')
frames = [df1, df2, df3, df4]
df = pd.concat(frames, ignore_index=True)

In [None]:
# save annotated file
subj = '1030' # id
file = subj + 'annotated.csv'
path = r'Y:\CIS-PD Videos\Cisnwh8' # change to correct folder
filename = os.path.join(path, file)
with open(filename,'wb') as f:
    df.to_csv(filename, sep=',')

In [9]:
def frame_to_sec(frames,fps=30):
    """Convert frames with fps sampling rate to time in total sec."""
    seconds = int((frames / (60 * fps) % 60)*60)
    return seconds

In [10]:
def convert_frame_to_sec(df):
    """Function takes dataframe converts frames column to seconds to be used later to clip video."""
    # Example call: convert_frame_to_sec(os.path.join(path, '1030annotated.csv'))

    # add column of activity abbreviated name
    # Note: omitted 'Shaking' activity
    activity = ('Stndg', 'Wlkg', 'WlkgCnt', 'FtnR', 'FtnL', 'RamR', 'RamL', 'SitStand', 
                  'Drwg', 'Typg', 'NtsBts', 'Drnkg', 'Sheets', 'Fldg', 'Sitng')
    df['shortname'] = np.array(activity*6)
    
    # convert frames to sec
    df['start time sec'] = df['start frame'].apply(frame_to_sec)
    df['stop time sec'] = df['stop frame'].apply(frame_to_sec)

    ###########################
    # optional: add 1 sec to stop time except sitting
    #for i in range(len(df)-1):
    #    df['stop time sec'][i] +=1
    
    # save updated file
    with open(filename,'wb') as f:
        df.to_csv(filename, sep=',')

In [11]:
def clip_fullvideo(df, video_name):
    """Takes video with complete activity set and clips them into separate activities.
    Args: df = dataframe that has the timestamps for start and stop in sec
          video_name = full video name with complete activity set
    """
    # convert to column from int to str type
    df['cycle'] = df['cycle'].apply(str)
    
    # Loop through and clip each activity to corresponding cycle video
    for i in range(len(df)):
        cycleindex = df['cycle'][i]
        video_name = 'Cisnwh8_cycle' + cycleindex + '.mp4' # omit since there's an arg?
        video_full = os.path.join(path, video_name)
        t1 = df['start time sec'][i]
        t2 = df['stop time sec'][i]
        activity = df['shortname'][i]
        # path_for_target = os.path.join(path,'\cycle' + cycleindex)
        path_for_target = path + '\cycle' + cycleindex + '\\'
        targetname = path_for_target + activity + '.mp4'
        print(activity, t1, t2, targetname)

        # clip activity video
        ffmpeg_extract_subclip(video_full, t1, t2, targetname=targetname)

Activity List:

Shaking (omitted)

Standing

Walking

Walking while counting

Finger to nose--right hand

Finger to nose--left hand

Alternating right hand movements

Alternating left hand movements

Sit to stand

Drawing on a paper

Typing on a computer keyboard

Assembling nuts and bolts

Taking a glass of water

Organizing sheets in a folder

Folding towels

Sitting

In [22]:
def dataprocess(id, id_dict=id_dict):#df, video_name):
    """Input id, then it will execute all functions to clips videos."""
    # get subject id
    id = id_dict.get(1003, 'Unknown')
    file = 'raw_annotation.csv'
    path = r'Z:\CIS-PD Videos'
    subj_path = os.path.join(path,id)
    df = dataclean(file, subj_path, id)
    #df['start time sec'] = df['start frame'].apply(frame_to_sec)
    #df['stop time sec'] = df['stop frame'].apply(frame_to_sec)
    convert_frame_to_sec(df)
    
    
    #clip_fullvideo(df, video_name)
    
    
    print(subj_path)

In [23]:
# check output
dataprocess(1003)

Z:\CIS-PD Videos\cisuabd4


# 1003 - all cycles

In [8]:
# get subject id from dictionary
id = id_dict.get(1003, 'Unknown')
print(id)

cisuabd4


In [12]:
file = 'raw_annotation.csv'
path = r'Z:\CIS-PD Videos\cisuabd4'
id = 1003
df = dataclean(file, path, id)

   subject code  start frame  stop frame                activity  cycle
1          1003         2379        3146                Standing      1
2          1003         4108        5396                 Walking      1
3          1003         6509        7732  Walking while counting      1


In [13]:
df['start time sec'] = df['start frame'].apply(frame_to_sec)
df['stop time sec'] = df['stop frame'].apply(frame_to_sec)
print(df.head(3))

   subject code  start frame  stop frame                activity  cycle  \
1          1003         2379        3146                Standing      1   
2          1003         4108        5396                 Walking      1   
3          1003         6509        7732  Walking while counting      1   

   start time sec  stop time sec  
1              79            104  
2             136            179  
3             216            257  


In [14]:
# check contents
df['stop time sec'][1]

104

In [None]:
df = convert_frame_to_sec(df)

# 1005 - skip cycle 5, all cycles
- Note: No cycle 6 video

In [None]:
# get subject id from dictionary
id = id_dict.get(1005, 'Unknown')
print(id)

# 1007 - cycle 1-4

In [None]:
# get subject id from dictionary
id = id_dict.get(1007, 'Unknown')
print(id)

# 1009 - cycle 1, 4, 6

In [None]:
# get subject id from dictionary
id = id_dict.get(1009, 'Unknown')
print(id## annotated video)

# 1019 - cycle 4

In [None]:
# get sub## annotated videoject id from dictionary
id = id_dict.get(1019, 'Unknown')
print(id)

# 1023 - cycle 2-6
- Note: these videos cut to each activity, so NO need to add 1sec to end of each activity
- Video seems edited given the opening title of each video and the way they're spliced

In [None]:
# g## annotated videoet subject id from dictionary
id = id_dict.get(1023, 'Unknown')
print(id)

# 1024 - cycle 3
- Note: these videos cut to each activity, so NO need to add 1sec to end of each activity
- Video seems edited given the opening title of each video and the way they're spliced

In [None]:
# get subject id from dictionary
id ## annotated video= id_dict.get(1024, 'Unknown')
print(id)

# 1039 - cycle 1-6
- Note: these videos cut to each activity, so NO need to add 1sec to end of each activity
- Video seems edited given the opening title of each video and the way they're spliced

In [None]:
# get subject id from dictionary
id = id_dict.get(1039, 'Unknown')- Note: these videos cut to each activity, so NO need to add 1sec to end of each activity
- Video seems edited given the opening title of each video and the way they're spliced
print(id)

# 1043 - cycle 2-6
- Note: these videos cut to each activity, so NO need to add 1sec to end of each activity
- Video seems edited given the opening title of each video and the way they're spliced

In [None]:
# get subject id from dictionary
id = id_dict.get(1043, 'Unknown')
print(id)

# 1048 - cycle 1-5
- raw Video name: added 'cycle' + corresponding number + '_' as prefix to name
- cycle1: video names are cycle1_MVI_0032 (towel folding), cycle1_MVI_0033 (towel folding), cycle1_MVI_0034 (sit)
- cycle3: only video is watch shaking

## Did NOT annotate video as it was already clipped

### cycle2 video names:
- cycle2_MVI_0035   shaking watch stand
- cycle2_MVI_0036   shaking watch sit
- cycle2_MVI_0037   Stndg
- cycle2_MVI_0038   Wlkg
- cycle2_MVI_0039   WlkgCnt
- cycle2_MVI_0042   FtnR
- cycle2_MVI_0043   FtnL
- cycle2_MVI_0044   RamR
- cycle2_MVI_0045   RamL
- cycle2_MVI_0046   SitStand
- cycle2_MVI_0047   Drwg
- cycle2_MVI_0048   Typg
- cycle2_MVI_0049   NtsBts
- cycle2_MVI_0050   ***no activity***
- cycle2_MVI_0051   Drnkg
- cycle2_MVI_0052   Sheets_trial1
- cycle2_MVI_0053   Sheets_trial2
- cycle2_MVI_0054   Fldg
- cycle2_MVI_0055   Sitng

### cycle4 video names:
- missing: Wlkg, WlkgCnt, Sheets, Fldg
- cycle4_MVI_0075   shaking watch stand
- cycle4_MVI_0076   Stndg
- cycle4_MVI_0078   FtnR
- cycle4_MVI_0079   FtnL
- cycle4_MVI_0080   RamR
- cycle4_MVI_0081   RamL
- cycle4_MVI_0082   SitStand
- cycle4_MVI_0083   Drwg
- cycle4_MVI_0084   Typg
- cycle4_MVI_0085   NtsBts
- cycle4_MVI_0086   Drnkg
- cycle4_MVI_0089   Sitng

### cycle5 video names:
- missing: Stndg, Wlkg, WlkgCnt, FtnR, Drnkg, Sheets, Fldg
- cycle5_MVI_0095   FtnL
- cycle5_MVI_0096   RamR
- cycle5_MVI_0097   RamL
- cycle5_MVI_0098   SitStand
- cycle5_MVI_0100   Drwg
- cycle5_MVI_0101   Typg
- cycle5_MVI_0102   NtsBts
- cycle5_MVI_0106   Sitng

In [None]:
# get subject id from dictionary
id = id_dict.get(1048, 'Unknown')
print(id)

# 1050 - cycle 1-6
- note: There is NO cycle 1 video
- Cycle6 is split into 2 parts, so will need to modify function that clips videos
- Part1 activities: standing - Nuts and Bolts (part 1)
- Part2 activities: Nuts and Bolds (part 2) - sitting
- Nuts and Bolts parts 1 and 2 need to be concatenated

In [None]:
# get subject id from dictionary
id = id_dict.get(1050, 'Unknown')
print(id)