# Data Acquisition

In [1]:
from pytube import YouTube 
import os
import shutil #to move files around
import glob 
import cv2
import imageio  #to read gif files

### Directory to store youtube data

In [2]:
savedir = 'collected_data'

In [3]:
!ls

get_data.ipynb	label_frames.ipynb


### Get Frames from Youtube 

In [4]:
def downloadYouTube(videourl, path):

    yt = YouTube(videourl)
    yt = yt.streams.filter(progressive=True,file_extension='mp4').order_by('resolution').desc().first()
    if not os.path.exists(path):
        os.makedirs(path)
        print('Built directory ', path)
    yt.download(path)



linklist = ['https://www.youtube.com/watch?v=d2dx7fy6XnY', 'https://www.youtube.com/watch?v=VdIX8auOH_M',
            'https://www.youtube.com/watch?v=4Ejz7IgODlU', 'https://www.youtube.com/watch?v=EkqEjfGEqH4',
           'https://www.youtube.com/watch?v=-Lj5i6bSCgA']

for link in linklist:
    downloadYouTube(link, os.path.join(savedir, 'youtube_files'))
    print('Download of', link)

Built directory  collected_data/youtube_files
Download of https://www.youtube.com/watch?v=d2dx7fy6XnY
Download of https://www.youtube.com/watch?v=VdIX8auOH_M
Download of https://www.youtube.com/watch?v=4Ejz7IgODlU
Download of https://www.youtube.com/watch?v=EkqEjfGEqH4
Download of https://www.youtube.com/watch?v=-Lj5i6bSCgA


In [5]:
!ls

collected_data	get_data.ipynb	label_frames.ipynb


### Get Frames from GIF Files

In [7]:
def get_frames_from_gif(gif_path):
    #pics/gif_originals/w2.gif'
    gif = imageio.mimread(gif_path)
    print("Total {} frames in the gif!".format(len(gif)))
    #convert from RGB to BGR 
    imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in gif]
    return imgs

### Built new directory and move filelist to  directory to simplify labeling

In [8]:
#automated labeling
def split_files_to_folder(filelist, dirname):
    if not os.path.exists(dirname):
        os.makedirs(dirname)
        print('Built new dir', dirname)
    for filename in filelist:
        old_dir = filename
        new_dir = os.path.join(dirname, filename)
        shutil.move(old_dir, new_dir)

### Change to directory where videos are saved

In [9]:
os.chdir(os.path.join(savedir, 'youtube_files'))

In [10]:
!ls 

'Jessamyn Stanleys 8-Minute Yoga for Self-Love  Health.mp4'
'Warrior 2 Yoga Pose - Yoga With Adriene.mp4'
'Warrior II Pose Virabhadrasana II.mp4'
'YogaVibescom - Kids Yoga Warrior II (Virabhadrasana II) & Warrior III (Virabhadrasana III).mp4'
'Yoga Warrior Two Sequence.mp4'


In [11]:
import re
video_list = glob.glob('*.mp4')
for file in video_list:
    #strip of file extension .mp4 to built directory
    dir_name = os.path.splitext(file)[0]
    #remove all weird characters, only leave alphanumeric ones
    dirname = re.sub(r'\W+', '', dir_name)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    #move mp4 to its folder
    shutil.move(file, dirname)
  
  

### Split youtube videos into single frames

In [12]:
def video_to_frames():
    path = glob.glob('*.mp4')
    print(path[0])
    vidcap = cv2.VideoCapture(path[0])
    success,image = vidcap.read()
    count = 0
    success = True
    while success:
        success,image = vidcap.read()
        if count%20 == 0 :
            cv2.imwrite('frame%06d.jpg'%count,image)
        count+=1

### Organize files
1. Go to directory of one video
2. Split video into frames 
3. Sort files in ascending order by file names (e.g. 00000 to 00009)

In [13]:
for subfolder in next(os.walk('.'))[1]:
    #walk to subdir
    os.chdir(subfolder)
    #split video into frames
    video_to_frames()
    #walk up again
    os.chdir('..')

YogaVibescom - Kids Yoga Warrior II (Virabhadrasana II) & Warrior III (Virabhadrasana III).mp4
Warrior 2 Yoga Pose - Yoga With Adriene.mp4
Yoga Warrior Two Sequence.mp4
Jessamyn Stanleys 8-Minute Yoga for Self-Love  Health.mp4
Warrior II Pose Virabhadrasana II.mp4


In [14]:
!ls JessamynStanleys8MinuteYogaforSelfLoveHealth/

 frame000000.jpg   frame007320.jpg
 frame000020.jpg   frame007340.jpg
 frame000040.jpg   frame007360.jpg
 frame000060.jpg   frame007380.jpg
 frame000080.jpg   frame007400.jpg
 frame000100.jpg   frame007420.jpg
 frame000120.jpg   frame007440.jpg
 frame000140.jpg   frame007460.jpg
 frame000160.jpg   frame007480.jpg
 frame000180.jpg   frame007500.jpg
 frame000200.jpg   frame007520.jpg
 frame000220.jpg   frame007540.jpg
 frame000240.jpg   frame007560.jpg
 frame000260.jpg   frame007580.jpg
 frame000280.jpg   frame007600.jpg
 frame000300.jpg   frame007620.jpg
 frame000320.jpg   frame007640.jpg
 frame000340.jpg   frame007660.jpg
 frame000360.jpg   frame007680.jpg
 frame000380.jpg   frame007700.jpg
 frame000400.jpg   frame007720.jpg
 frame000420.jpg   frame007740.jpg
 frame000440.jpg   frame007760.jpg
 frame000460.jpg   frame007780.jpg
 frame000480.jpg   frame007800.jpg
 frame000500.jpg   frame007820.jpg
 frame000520.jpg   frame007840.jpg
 frame000540.jpg   frame0078

###  Labeling frames
1. Look at the pictures and identify when pose starts & ends (frame number start to end)
2. Use these numbers to faster label images

#  <font color='red'> TODO add numbers for the other video here!</font>

In [15]:
# warrior II left foot in front
adriene_start_good_pose = 6100
adriene_end_good_pose = 6240
#no transition in because she changes frame view (only feet)

jessy_start_good_pose = 9120
jessy_end_good_pose = 9520


jessy_start_trans_in = 8500
jessy_end_trans_in = 9100

#iyengar_start_good_pose = 1920
#iyengar_end_good_pose =  2780

#start =  2820
#end = 3820

### Organize Frames into subfolders

In [30]:
def semi_automated_labeling(start_frame_number, end_frame_number, unsorted_dir, sort_dirname):
    ''' Organizes frames into subfolders
        sort_dir_name: directory to organize subcategories of pose
        frame_intervall: count in video_to_frames, defines intervall step size
        start_frame_number: frame number where position starts
        end_frame_number: frame number where position ends    
        Move pictures in between start and end frame number to labeled folder
    '''
    frame_intervall = 20
    os.chdir(unsorted_dir)
    # list all jpg files in directory
    files = glob.glob('*.jpg')
    # sort file list
    files_sorted = sorted(files)
    # split sorted intervall in good pose and bad pose 
    pose_pics = files_sorted[int(start_frame_number/frame_intervall): int(end_frame_number/frame_intervall)]
    # make a directory
    os.makedirs(sort_dirname)
    split_files_to_folder(pose_pics, sort_dirname)
    os.chdir('..')

### Move all files of intervall of pose/no pose to folder

In [None]:
#for subfolder in next(os.walk('.'))[1]:
#    #walk to subdir
#    os.chdir(subfolder)
#    #split video into frames
#    video_to_frames()
#    #walk up again
#    os.chdir('..')

In [17]:
!ls

iyengar
JessamynStanleys8MinuteYogaforSelfLoveHealth
Warrior2YogaPoseYogaWithAdriene
WarriorIIPoseVirabhadrasanaII_rechte_Seite
YogaVibescomKidsYogaWarriorIIVirabhadrasanaIIWarriorIIIVirabhadrasanaIII
YogaWarriorTwoSequence


In [21]:
semi_automated_labeling(adriene_start_good_pose, adriene_end_good_pose, 'Warrior2YogaPoseYogaWithAdriene', 'w2_left')

In [29]:
#
!ls

iyengar
JessamynStanleys8MinuteYogaforSelfLoveHealth
Warrior2YogaPoseYogaWithAdriene
WarriorIIPoseVirabhadrasanaII_rechte_Seite
YogaVibescomKidsYogaWarriorIIVirabhadrasanaIIWarriorIIIVirabhadrasanaIII
YogaWarriorTwoSequence


In [31]:
semi_automated_labeling(jessy_start_good_pose, jessy_end_good_pose, 'JessamynStanleys8MinuteYogaforSelfLoveHealth', 'w2_left')

In [32]:
semi_automated_labeling(jessy_start_trans_in, jessy_end_trans_in, 'JessamynStanleys8MinuteYogaforSelfLoveHealth', 'transition')