In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import cv2

import os
import sys
import glob

In [3]:
current_dir = os.getcwd()
videos_dir = os.path.join(current_dir, 'rwth-boston-104/videoBank/camera0/')
labels_dir = os.path.join(current_dir, 'rwth-boston-104/corpus/')

In [31]:
# obtain the names of the different videos and the corresponding translations
# then sort them according to video number (use this later for dividing into train/dev/test)
# and save the names/translations to a csv file
os.chdir(labels_dir)

train = pd.read_csv('train.sentences.pronunciations.multi.translations.csv', sep = ';')
test = pd.read_csv('test.sentences.multi.translations.csv', sep = ';')

names = train['video'].append(test['video'], ignore_index = True)
labels = train['translation'].append(test['translation'], ignore_index = True)

names = [int(i.split('_')[0]) for i in names]

zipped_list = zip(names, labels)
sorted_zip = sorted(zipped_list, key = lambda x: x[0])

names, labels = [list(x) for x in zip(*sorted_zip)]

df = pd.DataFrame(sorted_zip, columns = ['Video', 'Translations'])
print(df.head())

os.chdir('..')
os.chdir('..') # need to save the csv file in the Data folder
path = os.path.join(os.getcwd(), 'labels.csv')

df.to_csv(path, index = False)

   Video                     Translations
0      1        John writes his homework.
1      2        John writes his homework.
2      3  I visited John there yesterday.
3      4  I visited John there yesterday.
4      5                 Mary loves John.


In [20]:
x = ['001_', '004_', '021_']
y = [int(i.split('_')[0]) for i in x]
print(y)

[1, 4, 21]


In [4]:
# extract frames from a given video
def FrameCapture(name, path, video_dir): 
    '''
    name: the name of the video being processed
    path: absolute path of the video being processed
    video_dir: directory of the video
    '''
    vid = cv2.VideoCapture(path) # get the video
    
    current_dir = os.path.join(video_dir, name) # get the path to the directory to store the frames
    
    try:
        # if the folder for containing the frames doesn't exist..
        # create it and change the directory to it
        if not os.path.exists(current_dir):
            os.mkdir(current_dir)
            os.chdir(current_dir)
    
            current_frame = 1
            success = True

            while(success):
                # read in the video
                success, frame = vid.read()

                # if the reading was successful
                # then save the frame to file
                # if the video was named 001_0.mpg, 
                # then the frame image is saved as 001_001.jpg 
                # where 1 is the first frame, etc.
                if success:
                    file_name = name + '_' + str(current_frame).zfill(3) + '.jpg'
                    cv2.imwrite(file_name, frame)

                else:
                    break

                current_frame += 1
    
    except:
        print('Error - cannot create new directory')
    
    vid.release()
    cv2.destroyAllWindows()

In [5]:
for i in range(1, 202):
    num_str = str(i)
    filled_num = num_str.zfill(3)
    
    names.append(filled_num)

    path = os.path.join(videos_dir, filled_num + '_0.mpg')

    # extract frames for each video and store the frames in the corresponding directory
    FrameCapture(filled_num, path, videos_dir) 

In [99]:
# trial of seeing what numbers to use for the cropping
os.chdir(os.path.join(videos_dir, '135'))
print(os.getcwd())

img = cv2.imread('135_054.jpg')
img_cropped = img[9:240, 3:330]

plt.imshow(img_cropped)

/Users/sunnyshah/Desktop/Spring 2020/CS 230/Project/Data/rwth-boston-104/videoBank/camera0/135


In [114]:
os.chdir(videos_dir)

path_list = []

for folders, subfolders, _ in os.walk(os.getcwd()):
    for s in subfolders:
        path_list.append(os.path.join(folders, s))

for path in path_list:
    os.chdir(path)
    
    for _, _, files in os.walk(os.getcwd()):
        for f in files:
            if f.endswith('.jpg'):
                img = cv2.imread(f)
                img_cropped = img[9:240, 3:330]

                name = f.split('.')[0] + '_cropped.jpg'

                cv2.imwrite(name, img_cropped)
                cv2.destroyAllWindows()

In [124]:
# removes all the uncropped images from the folder 'Processed Data'
os.chdir(os.path.join(videos_dir, 'Processed Data'))

path_list = []

for folders, subfolders, _ in os.walk(os.getcwd()):
    for s in subfolders:
        path_list.append(os.path.join(folders, s))

for path in path_list:
    os.chdir(path)
    
    for _, _, files in os.walk(os.getcwd()):
        for f in files:
            if (not f.endswith('_cropped.jpg')) and (f.endswith('.jpg')):
                os.remove(f)

In [132]:
# renames all the files '*_cropped.jpg' as '*.jpg'
os.chdir(os.path.join(videos_dir, 'Processed Data'))

path_list = []

for folders, subfolders, _ in os.walk(os.getcwd()):
    for s in subfolders:
        path_list.append(os.path.join(folders, s))

for path in path_list:
    os.chdir(path)
    
    for _, _, files in os.walk(os.getcwd()):
        for f in files:
            if f.endswith('.jpg'):
                name = f.split('_')
                new_name = name[0] + '_' + name[1] + '.jpg'
                
                source = os.path.join(os.getcwd(), f)
                destination = os.path.join(os.getcwd(), new_name)
                os.rename(source, destination)