In [1]:
import os
import json
import cv2
import random
import csv
from tqdm import tqdm

random.seed(42)

In [2]:
dataset_root = '/home/kvu/erc/meld/MELD-face-videos/'
dataset_json_path = dataset_root + 'datasets.json'
train = dataset_root + 'train'
dev = dataset_root + 'dev'
test = dataset_root + 'test'

output = 'single-frame-dataset'

In [3]:
with open(dataset_json_path) as f:
    dataset_json = json.load(f)

In [4]:
labels_mapping = {'anger': 6, 
                  'disgust': 5, 
                  'fear': 2, 
                  'joy': 4, 
                  'neutral': 0, 
                  'sadness': 3, 
                  'surprise': 1}

In [5]:
dataset_json['train']['dia0_utt0']

{'SrNo': '1',
 'Utterance': 'also I was the point person on my company’s transition from the KL-5 to GR-6 system.',
 'Speaker': 'Chandler',
 'Emotion': 'neutral',
 'Sentiment': 'neutral',
 'Dialogue_ID': '0',
 'Utterance_ID': '0',
 'Season': '8',
 'Episode': '21',
 'StartTime': '00:16:16,059',
 'EndTime': '00:16:21,731'}

In [6]:
len(os.listdir(train))

7523

In [7]:
oversampling = {'neutral': 1, 'surprise': 3, 'fear': 16, 'sadness': 7, 'joy': 3, 'disgust': 16, 'anger': 4}

In [17]:
def video2frame(set_name, source_video_folder, output_folder, output_csv_path):
    row_list = []
    row_list.append(['image_id', 'label'])

    list_dir = os.listdir(source_video_folder) 
    for vid in tqdm(list_dir):
        # Get video's metadata
        vid_name = vid[:-4]
        vid_capture = cv2.VideoCapture(source_video_folder + '/' + vid)
        vid_length = int(vid_capture.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
#         if vid_length < 2:
#             continue
        emotion = dataset_json[set_name][vid_name]['Emotion']
        
        ratio = 1 if set_name == 'test' else oversampling[emotion]
        for i in range(ratio):
            # Pick a random frame
            frame_index = random.randint(0, vid_length)
            vid_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_index)

            ret, frame = vid_capture.read()
            output_name = vid_name + f"_{i}" + '.jpg'
            output_path = output_folder + '/' + output_name
            cv2.imwrite(output_path, frame)

            image_id = output_name
            label = labels_mapping[emotion]
            row_list.append([image_id, label])

    with open(output_csv_path, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerows(row_list)

In [15]:
video2frame(set_name='train', 
            source_video_folder=train, 
            output_folder='/home/kvu/erc/data/train', 
            output_csv_path='/home/kvu/erc/Datasets/MELD-single-frame/train.csv')

100%|██████████| 7523/7523 [02:51<00:00, 43.87it/s] 


In [16]:
video2frame(set_name='dev', 
            source_video_folder=dev, 
            output_folder='/home/kvu/erc/data/dev', 
            output_csv_path='/home/kvu/erc/Datasets/MELD-single-frame/dev.csv')

100%|██████████| 860/860 [00:19<00:00, 43.40it/s]


In [18]:
video2frame(set_name='test', 
            source_video_folder=test, 
            output_folder='/home/kvu/erc/data/test', 
            output_csv_path='/home/kvu/erc/Datasets/MELD-single-frame/test.csv')

100%|██████████| 1983/1983 [00:38<00:00, 51.32it/s]
