In [None]:
import os
import json
from dotenv import load_dotenv
import numpy as np
from tqdm import tqdm
import cv2

load_dotenv()

True

In [2]:
dataset_path = os.getenv("DATASET_PATH")

metadata_file_path = os.path.join(dataset_path, "dataset_metadata.json")

with open(metadata_file_path, "r") as file:
    dataset_metadata = json.load(file)

instruments = os.getenv("INSTRUMENTS").split(",")

### Overall dataset analysis

In [3]:
layouts = []
moving_cameras = []
for artist, songs in dataset_metadata.items():
    for song, metadata in songs.items():
        layouts.append(metadata["layout"])
        moving_cameras.append(metadata["moving_camera"])

# Count different layouts and number of false moving cameras
layout_counts = {}
for layout in layouts:
    if layout is None:
        continue
    layout_tuple = tuple(layout)
    if layout_tuple not in layout_counts:
        layout_counts[layout_tuple] = 0
    layout_counts[layout_tuple] += 1

moving_camera_ratio = sum(1 for camera in moving_cameras if not camera) / len(
    moving_cameras
)

print("Layout Counts:")
for layout, count in layout_counts.items():
    print(f"{layout}: {count}")
print(f"Moving Camera Ratio (False): {moving_camera_ratio:.2%}")

Layout Counts:
('mridangam', 'vocal', 'violin'): 143
('mridangam', 'vocal', 'tanpura', 'violin'): 18
('violin', 'tanpura', 'vocal', 'mridangam'): 3
('violin', 'vocal', 'mridangam'): 18
('mridangam', 'tanpura', 'vocal', 'violin'): 9
('violin', 'vocal', 'ghatam', 'mridangam'): 7
('mridangam', 'ghatam', 'vocal', 'tanpura', 'violin'): 12
('mridangam', None, 'morsing', 'vocal', 'tanpura', 'violin'): 7
Moving Camera Ratio (False): 95.15%


In [None]:
fps_values = {}

for artist, songs in dataset_metadata.items():
    for song, metadata in songs.items():
        video_file_path = os.path.join(
            dataset_path, artist, song, f"{song}.mov"
        )
        try:
            cap = cv2.VideoCapture(video_file_path)
            if not cap.isOpened():
                print(f"Could not open video: {video_file_path}")
                continue
            fps = cap.get(cv2.CAP_PROP_FPS)
            if fps not in fps_values:
                fps_values[fps] = []
            fps_values[fps].append((artist, song))
            cap.release()
        except Exception as e:
            print(f"Error processing {video_file_path}: {e}")

print("\nFPS Values:")
for fps, songs in fps_values.items():
    print(f"{fps} FPS: {len(songs)} songs")

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x10367aaa0] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Ameya Karthikeyan/iniyagilum ninai/iniyagilum ninai.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Ameya Karthikeyan/iniyagilum ninai/iniyagilum ninai.mov


OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Ashok Subramaniaym/Haath Ke Murali/Haath Ke Murali.mov"
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Ashok Subramaniaym/Peiyaval Kaan/Peiyaval Kaan.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Ashok Subramaniaym/Haath Ke Murali/Haath Ke Murali.mov
Could not open video: /Volumes/Files/saraga_audiovisual/Ashok Subramaniaym/Peiyaval Kaan/Peiyaval Kaan.mov


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x1033bd120] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Brinda Manickavasagan/Bhavanuta/Bhavanuta.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Brinda Manickavasagan/Bhavanuta/Bhavanuta.mov


OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Kruthi Bhat/Thillana/Thillana.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Kruthi Bhat/Thillana/Thillana.mov


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x11dd480c0] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Raghav Krishna/Gananayakam/Gananayakam.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Raghav Krishna/Gananayakam/Gananayakam.mov


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x10367aaa0] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Ramya Kiranmayi/Neevera Kula/Neevera Kula.mov"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x10367aaa0] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Ramya Kiranmayi/Shanmukhapriva/Shanmukhapriva.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Ramya Kiranmayi/Neevera Kula/Neevera Kula.mov
Could not open video: /Volumes/Files/saraga_audiovisual/Ramya Kiranmayi/Shanmukhapriva/Shanmukhapriva.mov


OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Shruthi Bhat/Moogu Sannadhu/Moogu Sannadhu.mov"
[mov,mp4,m4a,3gp,3g2,mj2 @ 0x10367aaa0] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Shruthi Bhat/Raksha Bettare/Raksha Bettare.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Shruthi Bhat/Moogu Sannadhu/Moogu Sannadhu.mov
Could not open video: /Volumes/Files/saraga_audiovisual/Shruthi Bhat/Raksha Bettare/Raksha Bettare.mov


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x11dd47d40] moov atom not found
OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Vaibhav Ramu/Akshaya Linga Vibho/Akshaya Linga Vibho.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Vaibhav Ramu/Akshaya Linga Vibho/Akshaya Linga Vibho.mov


OpenCV: Couldn't read video stream from file "/Volumes/Files/saraga_audiovisual/Vidya Kalyanaraman/Om Namashivaya/Om Namashivaya.mov"


Could not open video: /Volumes/Files/saraga_audiovisual/Vidya Kalyanaraman/Om Namashivaya/Om Namashivaya.mov

FPS Values:
30.0 FPS: 195 songs
29.991790942673415 FPS: 1 songs
29.998473204743245 FPS: 1 songs
29.995953874165487 FPS: 1 songs
29.999255573890494 FPS: 1 songs
29.99210422424003 FPS: 1 songs
29.99463341085073 FPS: 1 songs
24.0 FPS: 11 songs
29.99870275879962 FPS: 1 songs
29.998348199537496 FPS: 1 songs


In [None]:
for artist, songs in list(dataset_metadata.items()):
    for song in list(songs.keys()):
        if dataset_metadata[artist][song]["layout"] != instruments:
            del dataset_metadata[artist][song]

songs_to_avoid = {
    "Ashok Subramaniaym": [
        "Arunachala Ramana",
        "Emouma VInavi",
        "Peiyaval Kaan",
        "Sharavana Bhava"
        ],
    "Hamzini": [
        "Chandra Chooda",
        "Paradevata",
        "Thillana"
    ],
    "Raghav Krishna": [
        "Nava Loka Nayike"
    ]
}

for artist, songs in songs_to_avoid.items():
    if artist in dataset_metadata:
        for song in songs:
            if song in dataset_metadata[artist]:
                del dataset_metadata[artist][song]
        
        if len(dataset_metadata[artist]) == 0:
            del dataset_metadata[artist]

In [None]:
fps_values = {}

for artist, songs in dataset_metadata.items():
    for song, metadata in songs.items():
        video_file_path = os.path.join(
            dataset_path, artist, song, f"{song}.mov"
        )
        try:
            cap = cv2.VideoCapture(video_file_path)
            if not cap.isOpened():
                print(f"Could not open video: {video_file_path}")
                continue
            fps = cap.get(cv2.CAP_PROP_FPS)
            if fps not in fps_values:
                fps_values[fps] = []
            fps_values[fps].append((artist, song))
            cap.release()
        except Exception as e:
            print(f"Error processing {video_file_path}: {e}")

print("\nFPS Values:")
for fps, songs in fps_values.items():
    print(f"{fps} FPS: {len(songs)} songs")

Could not open video: /home/gabriel/theo/masters-thesis/dataset/Ashok Subramaniaym/Haath Ke Murali/Haath Ke Murali.mov

FPS Values:
30.0 FPS: 117 songs
29.991790942673415 FPS: 1 songs
29.998473204743245 FPS: 1 songs
29.995953874165487 FPS: 1 songs
29.999255573890494 FPS: 1 songs
29.99210422424003 FPS: 1 songs
29.99463341085073 FPS: 1 songs
24.0 FPS: 11 songs


In [None]:
# Define the target layouts
target_layouts = [['mridangam', 'vocal', 'violin'], ['violin', 'vocal', 'mridangam']]

# Initialize counters
matching_songs_count = 0
total_duration = 0

# Iterate through the metadata to find matching songs
for artist, songs in dataset_metadata.items():
    for song, metadata in songs.items():
        if "layout" not in metadata or "body_detected" not in metadata or "moving_camera" not in metadata:
            continue
        if metadata["layout"] in target_layouts and metadata["body_detected"] and not metadata["moving_camera"]:
            matching_songs_count += 1
            total_duration += metadata.get("duration", 0)

total_duration = total_duration / 60 / 60  # Convert total duration to hours
print(f"Number of matching songs: {matching_songs_count}")
print(f"Total duration of matching songs: {total_duration:.2f} hours")

Number of matching songs: 141
Total duration of matching songs: 28.12 seconds


### Keypoints Sanity Checks

In [33]:
sanity_check_results = {}

for artist, songs in dataset_metadata.items():
    artist_path = os.path.join(dataset_path, artist)
    if not os.path.isdir(artist_path):
        continue

    sanity_check_results[artist] = {}
    for song in songs:
        song_path = os.path.join(artist_path, song)
        if not os.path.isdir(song_path):
            continue

        sanity_check_results[artist][song] = {}
        
        instruments_path = [f for f in os.listdir(song_path) if os.path.isdir(os.path.join(song_path, f))]
        
        for instrument in instruments_path:
            sanity_check_results[artist][song][instrument] = {}
            instrument_path = os.path.join(song_path, instrument)

            if instrument == "vocal":
                face_keypoints_path = os.path.join(
                    instrument_path, "face_keypoints.npy"
                )

                sanity_check_results[artist][song][instrument][
                    "face_keypoints_exists"
                ] = os.path.exists(face_keypoints_path)

            keypoints_path = os.path.join(instrument_path, "keypoints.npy")


            sanity_check_results[artist][song][instrument][
                "keypoints_exists"
            ] = os.path.exists(keypoints_path)

# Print the sanity check results
has_missing_files = False
for artist, songs in sanity_check_results.items():
    for song, instruments in songs.items():
        for instrument, files in instruments.items():
            missing_files = [
                file for file, exists in files.items() if not exists
            ]
            if missing_files:
                has_missing_files = True
                print(f"Artist: {artist}, Song: {song}, Instrument: {instrument}, Missing files: {missing_files}")
                break
        if has_missing_files:
            has_missing_files = False
            break

Artist: Aditi Prahalad, Song: Parama Pavana Rama, Instrument: vocal, Missing files: ['face_keypoints_exists']
Artist: Ashok Subramaniaym, Song: Eru Malai, Instrument: vocal, Missing files: ['face_keypoints_exists']
Artist: Hamzini, Song: Tamasinchu, Instrument: vocal, Missing files: ['face_keypoints_exists']
Artist: Ramya Kiranmayi, Song: Parama Purusha, Instrument: vocal, Missing files: ['face_keypoints_exists']
Artist: Sandhya Ramaswamy, Song: Gati Neeye, Instrument: vocal, Missing files: ['face_keypoints_exists']
Artist: Shruthi Bhat, Song: Moogu Sannadhu, Instrument: vocal, Missing files: ['face_keypoints_exists']


### Motion features Sanity Checks

In [34]:
sanity_check_results = {}

for artist, songs in dataset_metadata.items():
    artist_path = os.path.join(dataset_path, artist)
    if not os.path.isdir(artist_path):
        continue

    sanity_check_results[artist] = {}
    for song in songs:
        song_path = os.path.join(artist_path, song)
        if not os.path.isdir(song_path):
            continue

        sanity_check_results[artist][song] = {}
        
        instruments_path = [f for f in os.listdir(song_path) if os.path.isdir(os.path.join(song_path, f))]
        
        for instrument in instruments_path:
            if instrument not in instruments:
                continue
            sanity_check_results[artist][song][instrument] = {}
            instrument_path = os.path.join(song_path, instrument)

            audio_features_path = os.path.join(instrument_path, "audio_features.json")
            motion_features_path = os.path.join(instrument_path, "motion_features.json")
            motion_features_occluded_path = os.path.join(
                instrument_path, "motion_features_occluded.json"
            )
            correlation_path = os.path.join(
                instrument_path, "05_correlation_05s_windows.json"
            )

            sanity_check_results[artist][song][instrument][
                "audio_features_exists"
            ] = os.path.exists(audio_features_path)
            sanity_check_results[artist][song][instrument][
                "motion_features_exists"
            ] = os.path.exists(motion_features_path)
            sanity_check_results[artist][song][instrument][
                "motion_features_occluded_exists"
            ] = os.path.exists(motion_features_occluded_path)
            sanity_check_results[artist][song][instrument][
                "correlation_exists"
            ] = os.path.exists(correlation_path)

# Print the sanity check results
has_missing_files = False
for artist, songs in sanity_check_results.items():
    for song, instruments in songs.items():
        for instrument, files in instruments.items():
            missing_files = [
                file for file, exists in files.items() if not exists
            ]
            if missing_files:
                has_missing_files = True
                print(f"Artist: {artist}, Song: {song}, Instrument: {instrument}, Missing files: {missing_files}")
                break
        if has_missing_files:
            has_missing_files = False
            break

In [35]:
keypoint_scores = {}

for artist, songs in tqdm(dataset_metadata.items(),desc="Artists"):
    artist_dir = os.path.join(dataset_path, artist)
    if not os.path.isdir(artist_dir) or artist.startswith("."):
        continue

    keypoint_scores.setdefault(artist, {})

    for song in tqdm(songs, desc="Songs", leave=False):
        song_dir = os.path.join(artist_dir, song)
        if not os.path.isdir(song_dir) or song.startswith("."):
            continue

        keypoint_scores[artist].setdefault(song, {})

        for instr in instruments:
            inst_dir = os.path.join(song_dir, instr)
            if not os.path.isdir(inst_dir):
                continue

            try:
                keypoint_scores[artist][song][instr] = np.load(os.path.join(inst_dir, "keypoint_scores.npy"))
            except FileNotFoundError:
                print(
                    f"Missing feature files for {artist}/{song}/{instr}, skipping"
                )

Artists: 100%|██████████| 36/36 [00:00<00:00, 50.80it/s]


In [36]:
nan_frames_counts = {}
at_least_one_nan_counts = {}

for artist, songs in keypoint_scores.items():
    nan_frames_counts[artist] = {}
    at_least_one_nan_counts[artist] = {}
    for song, instruments in songs.items():
        nan_frames_counts[artist][song] = {}
        at_least_one_nan_counts[artist][song] = {}
        for instrument, keypoints in instruments.items():
            nan_frames_counts[artist][song][instrument] = 0
            at_least_one_nan_counts[artist][song][instrument] = 0
            
            if keypoints.size == 0:
                num_frames = 0
            else:
                num_frames = keypoints.shape[0]

            for i in range(num_frames):
                frame_keypoints = keypoints[i]
                if np.all(np.isnan(frame_keypoints)):
                    nan_frames_counts[artist][song][instrument] += 1
                if np.any(np.isnan(frame_keypoints)):
                    at_least_one_nan_counts[artist][song][instrument] += 1

In [37]:
total_frames = 0
total_nan_frames = 0
total_at_least_one_nan = 0

for artist, songs in nan_frames_counts.items():
    for song, instruments in songs.items():
        for instrument, nan_count in instruments.items():
            keypoints = keypoint_scores.get(artist, {}).get(song, {}).get(instrument)
            if keypoints is not None and keypoints.size > 0:
                num_frames = keypoints.shape[0]
                total_frames += num_frames
                total_nan_frames += nan_count
                total_at_least_one_nan += at_least_one_nan_counts[artist][song][instrument]

nan_ratio = (total_nan_frames / total_frames) * 100 if total_frames > 0 else 0
at_least_one_nan_ratio = (total_at_least_one_nan / total_frames) * 100 if total_frames > 0 else 0

print(f"Global Ratio of frames with all keypoints as NaN: {nan_ratio:.2f}%")
print(f"Global Ratio of frames with at least one keypoint as NaN: {at_least_one_nan_ratio:.2f}%")

Global Ratio of frames with all keypoints as NaN: 0.76%
Global Ratio of frames with at least one keypoint as NaN: 0.76%


In [38]:
print("Confidence score mean per artist:")
for artist, songs in keypoint_scores.items():
    scores = []
    for song, instrs in songs.items():
        for instr, score in instrs.items():
            scores.append(np.nanmean(score))

    if scores:
        print(f"{artist}: {np.mean(scores):.4f}")
    else:
        print(f"{artist}: No scores found")

print("Confidence score per instrument:")
instruments_scores = {instr: [] for instr in instruments}
for artist, songs in keypoint_scores.items():
    for song, instrs in songs.items():
        for instr, score in instrs.items():
            if instr in instruments_scores:
                instruments_scores[instr].append(np.nanmean(score))

for instr, scores in instruments_scores.items():
    if scores:
        print(f"{instr}: {np.mean(scores):.4f}")
    else:
        print(f"{instr}: No scores found")

Confidence score mean per artist:
Abhiram Bode: 7.2457
Abhishek Ravi Shankar: No scores found
Aditi Prahalad: 7.6279
Aditya Narayanan: No scores found
Ameya Karthikeyan: 7.9084
Anjanathirumalai: 7.4587
Archana Murali: No scores found
Ashok Subramaniaym: 7.5417
Ashwin Srikanth: 7.4778
Bhargavi Chandrasekar: No scores found
Brinda Manickavasagan: No scores found
Chandan Bala: No scores found
Hamzini: 7.6768
Hari Kishan: 7.5712
Kruthi Bhat: No scores found
Manickam Yogeswaran: 7.4168
Mukund Bharadwaj: 7.6985
Niranjan Dindodi: No scores found
Prithivi Harish: 7.5297
Raghav Krishna: 7.6020
Ramya Kiranmayi: 7.3644
Sahana: 7.5564
Sampadgodu Vighnaraja: 7.4200
Sandhya Ramaswamy: 7.4504
Sanjay Swaminathan: No scores found
Shradda Ganesh: 7.6221
Shruthi Bhat: 7.7408
Sivani: 7.8511
Srividya Vadnamali: No scores found
Surya Prakash: No scores found
Swarathmika: 7.6977
Vaibhav Ramu: 7.6751
Vidya Kalyanaraman: No scores found
Vishruthi Girish: No scores found
Vivek Mozhikulam: 7.4115
Yoga Keerthana:

In [39]:
print("Ratio of confidence scores below 3 per artist:")
for artist, songs in keypoint_scores.items():
    below_3_count = 0
    total_count = 0
    for song, instrs in songs.items():
        for instr, score in instrs.items():
            valid_scores = score[~np.isnan(score)]
            below_3_count += np.sum(valid_scores < 3)
            total_count += valid_scores.size

    if total_count > 0:
        ratio = below_3_count / total_count
        print(f"{artist}: {ratio:.4f}")
    else:
        print(f"{artist}: No scores found")

print("Ratio of confidence scores below 3 per instrument:")
instruments_ratios = {instr: [] for instr in instruments}
for artist, songs in keypoint_scores.items():
    for song, instrs in songs.items():
        for instr, score in instrs.items():
            valid_scores = score[~np.isnan(score)]
            below_3_count = np.sum(valid_scores < 3)
            total_count = valid_scores.size
            if total_count > 0:
                instruments_ratios[instr].append(below_3_count / total_count)
            else:
                instruments_ratios[instr].append(0)

for instr, ratios in instruments_ratios.items():
    if ratios:
        print(f"{instr}: {np.mean(ratios):.4f}")
    else:
        print(f"{instr}: No scores found")

Ratio of confidence scores below 3 per artist:
Abhiram Bode: 0.0574
Abhishek Ravi Shankar: No scores found
Aditi Prahalad: 0.0334
Aditya Narayanan: No scores found
Ameya Karthikeyan: 0.0379
Anjanathirumalai: 0.0402
Archana Murali: No scores found
Ashok Subramaniaym: 0.0400
Ashwin Srikanth: 0.0440
Bhargavi Chandrasekar: No scores found
Brinda Manickavasagan: No scores found
Chandan Bala: No scores found
Hamzini: 0.0367
Hari Kishan: 0.0374
Kruthi Bhat: No scores found
Manickam Yogeswaran: 0.0679
Mukund Bharadwaj: 0.0575
Niranjan Dindodi: No scores found
Prithivi Harish: 0.0329
Raghav Krishna: 0.0271
Ramya Kiranmayi: 0.0511
Sahana: 0.0444
Sampadgodu Vighnaraja: 0.0379
Sandhya Ramaswamy: 0.0335
Sanjay Swaminathan: No scores found
Shradda Ganesh: 0.0360
Shruthi Bhat: 0.0383
Sivani: 0.0134
Srividya Vadnamali: No scores found
Surya Prakash: No scores found
Swarathmika: 0.0399
Vaibhav Ramu: 0.0489
Vidya Kalyanaraman: No scores found
Vishruthi Girish: No scores found
Vivek Mozhikulam: 0.0373
Yo