In [4]:
import os
import json
import numpy as np

In [2]:
# Paths to the directories
base_dir = '/project/msoleyma_1026/ecp/data'
video_dir = os.path.join(base_dir, 'video/train')
text_dir = os.path.join(base_dir, 'text/train-emotion')
audio_dir = os.path.join(base_dir, 'audio/train-emotion')
output_dir = os.path.join(base_dir, 'concatenated/train-emotion')

In [3]:
# Ensure the output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Load train data
with open(os.path.join(base_dir, 'train.json'), 'r') as file:
    train_data = json.load(file)

In [4]:
# Iterate through each conversation
for conversation in train_data:
    for utterance in conversation['conversation']:
        video_name = utterance['video_name']
        base_name = video_name.replace('.mp4', '')

        # Paths to the embeddings
        video_path = os.path.join(video_dir, f'{base_name}.npy')
        text_path = os.path.join(text_dir, f'{base_name}.npy')
        audio_path = os.path.join(audio_dir, f'{base_name}.npy')

        # Check if all files exist
        if os.path.exists(video_path) and os.path.exists(text_path) and os.path.exists(audio_path):
            # Load embeddings
            video_emb = np.load(video_path)
            text_emb = np.load(text_path)
            audio_emb = np.load(audio_path)

            # Concatenate embeddings
            concatenated_emb = np.concatenate((video_emb, text_emb, audio_emb), axis=1)

            # Ensure the concatenated embeddings are of the correct shape (1, 2560)
            if concatenated_emb.shape == (1, 2560):
                # Save the concatenated embeddings
                save_path = os.path.join(output_dir, f'{base_name}_concatenated.npy')
                np.save(save_path, concatenated_emb)
            else:
                print(f'Error: Concatenated shape mismatch at {base_name}')
        else:
            print(f'Missing files for {base_name}')


In [5]:
# Paths to the directories
base_dir = '/project/msoleyma_1026/ecp/data'
video_dir = os.path.join(base_dir, 'video/test')
text_dir = os.path.join(base_dir, 'text/test-emotion')
audio_dir = os.path.join(base_dir, 'audio/test-emotion')
output_dir = os.path.join(base_dir, 'concatenated/test-emotion')

In [6]:
# Ensure the output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Load train data
with open(os.path.join(base_dir, 'test.json'), 'r') as file:
    test_data = json.load(file)

In [7]:
# Iterate through each conversation
for conversation in test_data:
    for utterance in conversation['conversation']:
        video_name = utterance['video_name']
        base_name = video_name.replace('.mp4', '')

        # Paths to the embeddings
        video_path = os.path.join(video_dir, f'{base_name}.npy')
        text_path = os.path.join(text_dir, f'{base_name}.npy')
        audio_path = os.path.join(audio_dir, f'{base_name}.npy')

        # Check if all files exist
        if os.path.exists(video_path) and os.path.exists(text_path) and os.path.exists(audio_path):
            # Load embeddings
            video_emb = np.load(video_path)
            text_emb = np.load(text_path)
            audio_emb = np.load(audio_path)

            # Concatenate embeddings
            concatenated_emb = np.concatenate((video_emb, text_emb, audio_emb), axis=1)

            # Ensure the concatenated embeddings are of the correct shape (1, 2560)
            if concatenated_emb.shape == (1, 2560):
                # Save the concatenated embeddings
                save_path = os.path.join(output_dir, f'{base_name}_concatenated.npy')
                np.save(save_path, concatenated_emb)
            else:
                print(f'Error: Concatenated shape mismatch at {base_name}')
        else:
            print(f'Missing files for {base_name}')


In [9]:
import numpy as np

x = np.load("/project/msoleyma_1026/ecp/data/concatenated/train-emotion/dia1utt1_concatenated.npy")
print(x.shape)

y = np.load("/project/msoleyma_1026/ecp/data/concatenated/test-emotion/dia16utt1_concatenated.npy")
print(y.shape)

(1, 2560)
(1, 2560)


In [5]:
with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_train.json', 'r') as f:
    utterance_pairs_emotion_flags_train = json.load(f)
with open('/project/msoleyma_1026/ecp/data/utterance_pairs_emotion_flags_test.json', 'r') as f:
    utterance_pairs_emotion_flags_test = json.load(f)

In [6]:
def process_pairs(input_folder_path, utterance_pairs_emotion_flags, output_folder_path):
    for conversation in utterance_pairs_emotion_flags:
        conv_id = list(conversation.keys())[0]
        for pair in conversation[conv_id]:
            (utt1_id, utt2_id), emotion, flag = pair['utterance_pair'], pair['emotion'], pair['valid_pair']
            utt1_embedding_filename = "dia" + str(conv_id) + "utt" + str(utt1_id)
            utt1_embedding_path = os.path.join(input_folder_path, f'{utt1_embedding_filename}_concatenated.npy')
            utt2_embedding_filename = "dia" + str(conv_id) + "utt" + str(utt2_id)
            utt2_embedding_path = os.path.join(input_folder_path, f'{utt2_embedding_filename}_concatenated.npy')

            utt1_embedding = np.load(utt1_embedding_path)
            utt2_embedding = np.load(utt2_embedding_path)

            combined_embedding = np.concatenate([utt1_embedding, utt2_embedding], axis=1)

            output_filename = f'conv_{conv_id}_utterance_pair_{utt1_id}_{utt2_id}.npy'
            if not os.path.exists(os.path.join(output_folder_path, output_filename)):
              np.save(os.path.join(output_folder_path, output_filename), combined_embedding)

concat_folder_path_train = '/project/msoleyma_1026/ecp/data/concatenated/train-emotion'
concat_folder_path_test = '/project/msoleyma_1026/ecp/data/concatenated/test-emotion'
concat_pair_embeddings_folder_path_train = '/project/msoleyma_1026/ecp/data/pair_embeddings/concatenated/train-emotion'
concat_pair_embeddings_folder_path_test = '/project/msoleyma_1026/ecp/data/pair_embeddings/concatenated/test-emotion'

In [7]:
process_pairs(concat_folder_path_train, utterance_pairs_emotion_flags_train, concat_pair_embeddings_folder_path_train)

In [8]:
process_pairs(concat_folder_path_test, utterance_pairs_emotion_flags_test, concat_pair_embeddings_folder_path_test)

In [9]:
import numpy as np

x = np.load("/project/msoleyma_1026/ecp/data/pair_embeddings/concatenated/train-emotion/conv_1_utterance_pair_3_1.npy")
print(x.shape)

y = np.load("/project/msoleyma_1026/ecp/data/pair_embeddings/concatenated/test-emotion/conv_16_utterance_pair_1_1.npy")
print(y.shape)

(1, 5120)
(1, 5120)
