# Step 1 : frame extraction

In [None]:
import cv2
import os
import time
import uuid
import pandas as pd

In [None]:
# 수어 영상이 저장되어 있는 폴더
video_folder = './video/'

# 영상에서 추출한 프레임을 저장할 폴더
frame_folder = './frame/'

file_list = os.listdir(video_folder)

for i in range(len(file_list)):
    # 영상 파일명과 같은 new _folder 생성
    new_folder = file_list[i][:-4]
    new_path = frame_folder + "{}".format(new_folder)
    
    if not (os.path.isdir(new_path)):
        # 해당 파일명과 동일한 폴더가 없으면 생성
        os.mkdir(os.path.join(new_path))

    # 영상에서 프레임 추출
    file_name = file_list[i] 
    file_path = video_folder + file_name
    
    cap = cv2.VideoCapture(file_path)
    time.sleep(5)
    count = 0

    while cap.isOpened():
        try:
            ret, frame = cap.read()
            cv2.imwrite("{}/{}_{}.png".format(new_path, new_folder[-4:], count), frame) 
            print('Saved %d frame' %count)

            if cv2.waitKey(10) == 27:                    
                  break
            count += 1
        except:
            cap.release()

In [None]:
#웹캠 프레임 저장
IMAGES_PATH = '../etc/'

#직접 구현 할 수어 단어들
labels = ['12', '15', '21', '29', '37', '45']
number_imgs = 15

#label별로 디렉토리를 만들고 uuid로 frame마다 unique idenrifier를 붙혀서 저장하기
for label in labels:
    os.mkdir('./{}'.format(label))
    
    #video capture시작(initialize webcam)
    cap = cv2.VideoCapture(0)
    print('Collecting Images for {}'.format(label))
    time.sleep(5)
    
    #frame을 capture
    for imgnum in range(number_imgs):
        ret, frame = cap.read()
        #각 frame이 unique한 이름으로 저장되도록 uuid.uuidi() 사용
        imagename = os.path.join(IMAGES_PATH, label, label+'-'+'{}.jpg'.format(str(uuid.uuid1())))
        #디렉토리에 이미지 저장
        cv2.imwrite(imagename, frame)
        time.sleep(2)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    #videoCapture끝내기        
    cap.release()

# Step 2 :  data augmentation

In [None]:
import numpy as np
import cv2
import os
import shutil

In [None]:
frame_folder = []

for folder in os.listdir('./frame/'):
    for file in os.listdir('./frame/{}' .format(folder)):
        if file.split('.')[-1] == 'png' or file.split('.')[-1] == 'txt':
            frame_folder.append(file[:4])

In [None]:
# 단어별 수어 영상 구분
hungry_list = ['0227', '1056', '1812', '2741', '3569', '4415', '5253', '6093', '6929', '7762']
hospital_list = ['0232', '1061', '1817', '2746', '3574', '4420', '5259', '6098', '6934', '7767']
teacher_list = ['0256', '1088', '1847', '2770', '3598', '4444', '5283', '6122', '6958', '7794']
woman_list = ['0282', '1115', '1876', '2796', '3623', '4470', '5309', '6148', '6984', '7821']
elevator_list = ['0298', '1132', '1895', '2812', '3640', '4486', '5325', '6164', '7000', '7838']
taxi_list = ['9088', '9219', '9249', '9384', '9410', '9572', '9725', '9897', '9925', '9992']

In [None]:
#수어 영상 프레임의 30%만 data augumentation
data_list = []
img_path = './frame/{}/'

for file in os.listdir(img_path):
    if file.split('.')[-1] == 'png':
        if file[:4] in {}_list:
            data_list.append(file.split('.')[0])  
            
dark_list = random.sample(data_list, int(len(data_list)*0.30))
dark_list_50 = dark_list[:int(len(dark_list)/2)]
dark_list_25 = dark_list[int(len(dark_list)/2):]

for file in dark_list_50:
    src = cv2.imread(img_path+file+'.png')
    sub_50 = cv2.subtract(src, np.array([50.0]))
    new_filename = img_path+file+'d'
    cv2.imwrite(new_filename+'.png', sub_50)
    shutil.copy(img_path+file+'.txt', new_filename+'.txt')

for file in dark_list_25:
    src = cv2.imread(img_path+file+'.png')
    sub_25 = cv2.subtract(src, np.array([25.0]))
    new_filename = img_path+file+'d'
    cv2.imwrite(new_filename+'.png', sub_25)
    shutil.copy(img_path+file+'.txt', new_filename+'.txt')

In [None]:
#웹캠 촬영 프레임 구분
hungry_list = '9993'
hospital_list = '9994'
teacher_list = '9995'
woman_list = '9996'
elevator_list = '9997'
taxi_list = '9998'

In [None]:
# 웹캠 촬영 프레임 전체 data augumentation
data_list = []
img_path = './frame/{}/'

for file in os.listdir(img_path):
    if file.split('.')[-1] == 'png':
        if file[:4] == {}_list:
            data_list.append(file.split('.')[0])
        
dark_list = random.sample(data_list, len(data_list))
dark_list_50 = dark_list[:int(len(dark_list)/2)]
dark_list_25 = dark_list[int(len(dark_list)/2):]

for file in dark_list_50:
    src = cv2.imread(img_path+file+'.png')
    sub_50 = cv2.subtract(src, np.array([50.0]))
    new_filename = img_path+file+'d'
    cv2.imwrite(new_filename+'.png', sub_50)
    shutil.copy(img_path+file+'.txt', new_filename+'.txt')

for file in dark_list_25:
    src = cv2.imread(img_path+file+'.png')
    sub_25 = cv2.subtract(src, np.array([25.0]))
    new_filename = img_path+file+'d'
    cv2.imwrite(new_filename+'.png', sub_25)
    shutil.copy(img_path+file+'.txt', new_filename+'.txt')

# Step 3 :  data train-test split

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [None]:
input_data = {}
path = './obj/'
word_list = os.listdir(path)
for index in word_list:
    file_list = []
    for file in os.listdir(path + f'{index}'):
        if file.split('.')[-1].lower() == 'png':
            file_list.append(file.split('.')[0])
    input_data[index] = file_list

y = []
for w in word_list:
    y_sub = []
    for i in range(0,len(input_data[w])):
        y_sub.append(w)
    y.append(y_sub)
    
X_train_list=[]
X_test_list=[]

for j in range(len(word_list)):
    X_train, X_test, y_train, y_test = train_test_split(
        input_data[word_list[j]], y[j], test_size=0.3, random_state=42)
    X_train_list.append(X_train)
    X_test_list.append(X_test)
    
path = './'

for n in range(len(word_list)):
    # train 파일 옮기기
    for r in range(len(X_train_list[n])):
        #.png파일 옮기기
        b_png= path + 'obj/{}/{}.png'.format(word_list[n], X_train_list[n][r])
        a_png= path + 'train/{}.png'.format(X_train_list[n][r])
        shutil.move(b_png, a_png)
        
    # test 파일 옮기기
    for r in range(len(X_test_list[n])):
        #.png파일 옮기기
        b_png= path + 'obj/{}/{}.png'.format(word_list[n],X_test_list[n][r])
        a_png= path + 'test/{}.png'.format(X_test_list[n][r])
        shutil.move(b_png, a_png)
        
for n in range(len(word_list)):
    # train 파일 옮기기
    for r in range(len(X_train_list[n])):
        #.txt파일 옮기기
        b_txt= path + 'obj/{}/{}.txt'.format(word_list[n], X_train_list[n][r])
        a_txt= path + 'train/{}.txt'.format(X_train_list[n][r])
        shutil.move(b_txt, a_txt)
        
    # test 파일 옮기기
    for r in range(len(X_test_list[n])):
        #.txt파일 옮기기
        b_txt= path + 'obj/{}/{}.txt'.format(word_list[n],X_test_list[n][r])
        a_txt= path + 'test/{}.txt'.format(X_test_list[n][r])
        shutil.move(b_txt, a_txt)

# Step 4 :  create train, test txt file

In [None]:
for png in os.listdir('./train/'):
    if png.split('.')[-1] == 'png':
        line = 'data/obj/' + png +'\n'
        with open('./yolo/custom_data/level_4_final/train.txt', 'a') as f:
            f.write(line)
            
for png in os.listdir('./test/'):
    if png.split('.')[-1] == 'png':
        line = 'data/obj/' + png +'\n'
        with open('./yolo/custom_data/level_4_final/test.txt', 'a') as f:
            f.write(line)

# Step 5 :  create names, data file

In [None]:
with open(full_path_to_images + '/' + 'classes.names', 'w') as names, \
     open(full_path_to_images + '/' + 'classes.txt', 'r') as txt:
    for line in txt:
        names.write(line)  
        c += 1

In [None]:
with open(full_path_to_images + '/' + 'labelled_data.data', 'w') as data:
    data.write('classes = ' + str(c) + '\n')
    data.write('train = ' + 'data' + '/' + 'train.txt' + '\n')
    data.write('valid = ' + 'data' + '/' + 'test.txt' + '\n')
    data.write('names = ' + 'data' + '/' + 'classes.names' + '\n')
    data.write('backup = backup')