In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import json

# Define your data directories
video_segments_dir = "/content/drive/MyDrive/Datasetr/Dataset/Videos/"
processed_video_dir = "/content/drive/MyDrive/Datasetr/processed_face_postures/"
processed_audio_dir = "/content/drive/MyDrive/Datasetr/processed_audio/"
processed_transcripts_dir = "/content/drive/MyDrive/Datasetr/processed_transcripts/"

# Function to read preprocessed video frame features
def read_video_features(video_id):
    """
    Reads video features from the processed video JSON file.

    Args:
        video_id (int): The interview ID (e.g., 10 for Interview_10).

    Returns:
        list: List of features extracted from the video.
    """
    video_file_path = os.path.join(processed_video_dir, f"Interview_{video_id}_face_postures.json")
    if not os.path.exists(video_file_path):
        raise FileNotFoundError(f"Video analysis file not found: {video_file_path}")

    with open(video_file_path, "r") as f:
        video_features = json.load(f)
    return video_features

# Function to read preprocessed audio features
def read_audio_features(interview_id):
    """
    Reads audio features from the processed audio JSON file.
    """
    audio_file_path = os.path.join(processed_audio_dir, f"Interview_{interview_id}.json")
    if not os.path.exists(audio_file_path):
        raise FileNotFoundError(f"Audio analysis file not found: {audio_file_path}")

    with open(audio_file_path, "r") as f:
        audio_features = json.load(f)
    return audio_features

# Function to read preprocessed transcript features
def read_transcript_features(interview_id):
    """
    Reads transcript features from the processed transcript JSON file.
    """
    transcript_file_path = os.path.join(processed_transcripts_dir, f"Interview_{interview_id}.json")
    if not os.path.exists(transcript_file_path):
        raise FileNotFoundError(f"Transcript analysis file not found: {transcript_file_path}")

    with open(transcript_file_path, "r") as f:
        transcript_features = json.load(f)
    return transcript_features

# Function to create a unified data structure
def create_unified_structure():

    unified_data = []

    # Iterate through each interview
    for i in range(1, 12):  # Process all interviews (1 to 11)
        interview_id = i  # Numeric ID for the interview

        try:
            # Get preprocessed features
            video_features = read_video_features(interview_id)
            audio_features = read_audio_features(interview_id)
            transcript_features = read_transcript_features(interview_id)

            # Create unified data structure for the current interview
            interview_data = {
                "interview_id": f"Interview_{interview_id}",
                "video_features": video_features,  # Frame-level features
                "audio_features": audio_features,  # Audio-level features
                "transcript_features": transcript_features,  # Linguistic features
            }

            # Append to the overall dataset
            unified_data.append(interview_data)

        except FileNotFoundError as e:
            print(e)
            print(f"Skipping interview: Interview_{interview_id}")

    return unified_data

# Generate unified data
unified_dataset = create_unified_structure()

# Save unified data to a JSON file (you can also save it as CSV if you prefer)
output_path = "/content/drive/MyDrive/Datasetr/unified_dataset.json"
with open(output_path, "w") as f:
    json.dump(unified_dataset, f, indent=4)

print(f"Unified dataset created and saved successfully at {output_path}.")

Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_1_face_postures.json
Skipping interview: Interview_1
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_2_face_postures.json
Skipping interview: Interview_2
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_3_face_postures.json
Skipping interview: Interview_3
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_4_face_postures.json
Skipping interview: Interview_4
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_5_face_postures.json
Skipping interview: Interview_5
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_6_face_postures.json
Skipping interview: Interview_6
Video analysis file not found: /content/drive/MyDrive/Datasetr/processed_face_postures/Interview_7_f

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Datasetr/unified_dataset.json'