In [2]:
!pip install mutagen

Collecting mutagen
  Downloading mutagen-1.47.0-py3-none-any.whl.metadata (1.7 kB)
Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/194.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m184.3/194.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.4/194.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mutagen
Successfully installed mutagen-1.47.0


In [3]:
import os
import json
import mimetypes
from mutagen.mp3 import MP3
from mutagen.wave import WAVE
import cv2

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
def get_audio_metadata(file_path):
    """Extract metadata from an audio file."""
    try:
        if file_path.endswith(".mp3"):
            audio = MP3(file_path)
        elif file_path.endswith(".wav"):
            audio = WAVE(file_path)
        else:
            return None

        return {
            "format": file_path.split(".")[-1].upper(),
            "duration": audio.info.length,
            "sampling_rate": audio.info.sample_rate,
            "bitrate": getattr(audio.info, "bitrate", "N/A")
        }
    except Exception as e:
        print(f"Error reading audio file {file_path}: {e}")
        return None

In [6]:
def get_video_metadata(file_path):
    """Extract metadata from a video file."""
    try:
        cap = cv2.VideoCapture(file_path)
        if not cap.isOpened():
            return None

        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps if fps > 0 else "Unknown"
        cap.release()

        return {
            "format": file_path.split(".")[-1].upper(),
            "resolution": f"{width}x{height}",
            "frame_rate": fps,
            "duration": duration
        }
    except Exception as e:
        print(f"Error reading video file {file_path}: {e}")
        return None

In [10]:
def extract_metadata(dataset_path):
    """Extract metadata from the dataset directory."""
    metadata = {
        "dataset_name": "Biovid",
        "version": "1.0",
        "description": "A biometric dataset containing audiovisual recordings for research.",
        "users": {}
    }

    for user in os.listdir(dataset_path):
        user_path = os.path.join(dataset_path, user)
        if os.path.isdir(user_path):
            user_data = {"audio": [], "video": []}

            for file in os.listdir(user_path):
                file_path = os.path.join(user_path, file)
                mime_type, _ = mimetypes.guess_type(file_path)

                if mime_type:
                    if mime_type.startswith("audio"):
                        audio_meta = get_audio_metadata(file_path)
                        if audio_meta:
                            user_data["audio"].append(audio_meta)
                    elif mime_type.startswith("video"):
                        video_meta = get_video_metadata(file_path)
                        if video_meta:
                            user_data["video"].append(video_meta)

            metadata["users"][user] = user_data

    with open("/content/gdrive/My Drive/Colab Notebooks/output/Metadata.json", "w") as f:
        json.dump(metadata, f, indent=4)
    print("Metadata.json file created successfully!")

In [11]:
dataset_dir = "/content/gdrive/My Drive/Colab Notebooks/cropped_lips"
# Print paths
print(f"Dataset Directory: {dataset_dir}")

Dataset Directory: /content/gdrive/My Drive/Colab Notebooks/cropped_lips


In [12]:
# Run the metadata extraction
extract_metadata(dataset_dir)

Metadata.json file created successfully!
