### Evaluation

##### LSEC

In [1]:
import os
os.getcwd()
%cd syncnet_python/

/home/ldap-users-2/lathifgalih-k/clone-video/syncnet_python


In [11]:
import warnings
import os

# Suppress warnings
warnings.filterwarnings('ignore')
os.environ['PYTHONWARNINGS'] = 'ignore'

# For LRS datasets, use the LRS evaluation script
!python calculate_scores_LRS.py --data_root /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/test_lsec --tmp_dir tmp_dir/

Avg Confidence: 4.129, Avg Minimum Dist: 10.272: 100%|█| 1/1 [00:01<00:00,  1.53
Average Confidence: 4.129159927368164
Average Minimum Distance: 10.271806716918945


#### LSE for All Data

In [12]:
import warnings
import os
import pandas as pd
import numpy as np
import subprocess
import glob
import shutil
from collections import defaultdict

# Suppress warnings
warnings.filterwarnings('ignore')
os.environ['PYTHONWARNINGS'] = 'ignore'

def extract_info_from_filename(filename):
    """Extract speaker ID and TTS method from filename"""
    basename = os.path.basename(filename)
    
    # Extract TTS method
    if "_CV_" in basename:
        tts_method = "CosyVoice"
    elif "_XT_" in basename:
        tts_method = "XTTSv2"
    elif "_YT_" in basename:
        tts_method = "YourTTS"
    else:
        tts_method = "Unknown"
    
    # Extract speaker ID (first part)
    speaker_id = basename.split('_')[0]  # A002, A006, S001, etc.
    
    return speaker_id, tts_method

def calculate_lse_batch(video_folder, dataset_name):
    """Calculate LSE scores for all videos in a folder using calculate_scores_LRS.py"""
    print(f"Calculating LSE scores for {dataset_name} videos in: {video_folder}")
    
    # Create temporary directory for this dataset
    tmp_dir = f"tmp_dir_{dataset_name}"
    
    try:
        # Run syncnet evaluation using calculate_scores_LRS.py
        cmd = [
            'python', 'calculate_scores_LRS.py', 
            '--data_root', video_folder,
            '--tmp_dir', tmp_dir
        ]
        
        # Suppress warnings
        env = os.environ.copy()
        env['PYTHONWARNINGS'] = 'ignore'
        
        print(f"Running command: {' '.join(cmd)}")
        result = subprocess.run(cmd, capture_output=True, text=True, env=env)
        
        print("STDOUT:", result.stdout)
        if result.stderr:
            print("STDERR:", result.stderr)
        
        # Parse results from stdout
        output_lines = result.stdout.split('\n')
        
        avg_confidence = None
        avg_min_distance = None
        
        for line in output_lines:
            if 'Average Confidence:' in line:
                try:
                    avg_confidence = float(line.split('Average Confidence:')[1].strip())
                except:
                    pass
            if 'Average Minimum Distance:' in line:
                try:
                    avg_min_distance = float(line.split('Average Minimum Distance:')[1].strip())
                except:
                    pass
        
        print(f"\n{dataset_name} Results:")
        print(f"  Average Confidence (LSE-C): {avg_confidence:.4f}" if avg_confidence else "  LSE-C: Not found")
        print(f"  Average Minimum Distance (LSE-D): {avg_min_distance:.4f}" if avg_min_distance else "  LSE-D: Not found")
        
        return avg_confidence, avg_min_distance
            
    except Exception as e:
        print(f"Error running syncnet: {e}")
        return None, None
    finally:
        # Clean up temporary directory
        if os.path.exists(tmp_dir):
            shutil.rmtree(tmp_dir)

# Define video folders to evaluate
video_folders = [
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/results/test_raw/v15/RAW",
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/results/test_vctk/v15/VCTK"
]

dataset_names = ["RAW", "VCTK"]

all_results = []
lse_d_scores = defaultdict(list)
lse_c_scores = defaultdict(list)
speaker_scores = defaultdict(lambda: defaultdict(list))

print("Starting LSE evaluation using calculate_scores_LRS.py...")
print("=" * 60)

for dataset_name, video_folder in zip(dataset_names, video_folders):
    if not os.path.exists(video_folder):
        print(f"Warning: Video folder not found: {video_folder}")
        continue
    
    print(f"\nProcessing {dataset_name} dataset...")
    print(f"Folder: {video_folder}")
    
    # Get all mp4 files
    video_files = glob.glob(os.path.join(video_folder, "*.mp4"))
    print(f"Found {len(video_files)} video files")
    
    if len(video_files) == 0:
        print("No video files found, skipping...")
        continue
    
    # Show first few files
    print("Sample files:")
    for i, vf in enumerate(video_files[:5]):
        print(f"  {i+1}. {os.path.basename(vf)}")
    
    # Calculate LSE scores for this dataset
    lse_c, lse_d = calculate_lse_batch(video_folder, dataset_name)
    
    if lse_c is not None and lse_d is not None:
        print(f"\n{dataset_name} Overall Scores:")
        print(f"  LSE-C (Confidence): {lse_c:.4f}")
        print(f"  LSE-D (Distance): {lse_d:.4f}")
        
        # Process each video file to get individual info
        for video_file in video_files:
            speaker_id, tts_method = extract_info_from_filename(video_file)
            
            # Store individual result (using overall scores for the dataset)
            result = {
                'dataset': dataset_name,
                'speaker_id': speaker_id,
                'tts_method': tts_method,
                'lse_d_score': lse_d,
                'lse_c_score': lse_c,
                'video_file': os.path.basename(video_file)
            }
            all_results.append(result)
            
            # Store for averaging by TTS method
            key = f"{dataset_name}_{tts_method}"
            lse_d_scores[key].append(lse_d)
            lse_c_scores[key].append(lse_c)
            
            # Store for averaging by speaker
            speaker_key = f"{dataset_name}_{speaker_id}"
            speaker_scores[speaker_key]['lse_d'].append(lse_d)
            speaker_scores[speaker_key]['lse_c'].append(lse_c)
            speaker_scores[speaker_key]['tts_methods'].append(tts_method)
    else:
        print(f"Failed to get LSE scores for {dataset_name}")

print("\n" + "=" * 60)
print("LSE EVALUATION COMPLETE")
print("=" * 60)

# Convert to DataFrame
df = pd.DataFrame(all_results)

# Display results
if len(df) > 0:
    print("\nIndividual Results:")
    print(df[['dataset', 'speaker_id', 'tts_method', 'lse_d_score', 'lse_c_score']].to_string(index=False))
    
    # Averages per dataset and TTS method
    print("\n" + "=" * 60)
    print("AVERAGES PER DATASET AND TTS METHOD")
    print("=" * 60)
    
    tts_summary_data = []
    for dataset in ['RAW', 'VCTK']:
        for tts_method in ['CosyVoice', 'XTTSv2', 'YourTTS']:
            key = f"{dataset}_{tts_method}"
            if key in lse_d_scores and len(lse_d_scores[key]) > 0:
                avg_lse_d = np.mean(lse_d_scores[key])
                std_lse_d = np.std(lse_d_scores[key])
                avg_lse_c = np.mean(lse_c_scores[key])
                std_lse_c = np.std(lse_c_scores[key])
                count = len(lse_d_scores[key])
                
                print(f"\n{dataset} - {tts_method} ({count} samples):")
                print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
                print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
                
                tts_summary_data.append({
                    'Dataset': dataset,
                    'TTS_Method': tts_method,
                    'Count': count,
                    'LSE_D_Mean': avg_lse_d,
                    'LSE_D_Std': std_lse_d,
                    'LSE_C_Mean': avg_lse_c,
                    'LSE_C_Std': std_lse_c
                })
    
    # Averages per speaker
    print("\n" + "=" * 60)
    print("AVERAGES PER SPEAKER")
    print("=" * 60)
    
    speaker_summary_data = []
    for speaker_key in sorted(speaker_scores.keys()):
        if len(speaker_scores[speaker_key]['lse_d']) > 0:
            avg_lse_d = np.mean(speaker_scores[speaker_key]['lse_d'])
            std_lse_d = np.std(speaker_scores[speaker_key]['lse_d'])
            avg_lse_c = np.mean(speaker_scores[speaker_key]['lse_c'])
            std_lse_c = np.std(speaker_scores[speaker_key]['lse_c'])
            count = len(speaker_scores[speaker_key]['lse_d'])
            
            tts_methods = list(set(speaker_scores[speaker_key]['tts_methods']))
            dataset_name = speaker_key.split('_')[0]
            speaker_id = '_'.join(speaker_key.split('_')[1:])
            
            print(f"\n{speaker_key} ({count} samples, TTS: {', '.join(tts_methods)}):")
            print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
            print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
            
            speaker_summary_data.append({
                'Dataset': dataset_name,
                'Speaker_ID': speaker_id,
                'Count': count,
                'LSE_D_Mean': avg_lse_d,
                'LSE_D_Std': std_lse_d,
                'LSE_C_Mean': avg_lse_c,
                'LSE_C_Std': std_lse_c,
                'TTS_Methods': ', '.join(tts_methods)
            })
    
    # Create and display summary tables
    print("\n" + "=" * 60)
    print("SUMMARY TABLES")
    print("=" * 60)
    
    # TTS Method Summary
    tts_summary_df = pd.DataFrame(tts_summary_data)
    print("\nSummary by Dataset and TTS Method:")
    print(tts_summary_df.round(4).to_string(index=False))
    
    # Speaker Summary
    speaker_summary_df = pd.DataFrame(speaker_summary_data)
    print("\nSummary by Speaker:")
    print(speaker_summary_df.round(4).to_string(index=False))
    
    # Overall statistics per dataset
    print("\n" + "=" * 60)
    print("OVERALL STATISTICS PER DATASET")
    print("=" * 60)
    
    for dataset in ['RAW', 'VCTK']:
        dataset_results = df[df['dataset'] == dataset]
        if len(dataset_results) > 0:
            avg_lse_d = dataset_results['lse_d_score'].mean()
            std_lse_d = dataset_results['lse_d_score'].std()
            avg_lse_c = dataset_results['lse_c_score'].mean()
            std_lse_c = dataset_results['lse_c_score'].std()
            
            print(f"\n{dataset} Dataset:")
            print(f"  Total samples: {len(dataset_results)}")
            print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
            print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
    
    # Save results to CSV
    output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_results_lse.csv"
    df.to_csv(output_path, index=False)
    print(f"\nDetailed results saved to: {output_path}")
    
    # Save TTS summary
    tts_summary_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_summary_lse_tts.csv"
    tts_summary_df.to_csv(tts_summary_output_path, index=False)
    print(f"TTS summary saved to: {tts_summary_output_path}")
    
    # Save speaker summary
    speaker_summary_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_summary_lse_speaker.csv"
    speaker_summary_df.to_csv(speaker_summary_output_path, index=False)
    print(f"Speaker summary saved to: {speaker_summary_output_path}")

else:
    print("No results found!")

print("\nLSE evaluation completed successfully!")

Starting LSE evaluation using calculate_scores_LRS.py...

Processing RAW dataset...
Folder: /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/results/test_raw/v15/RAW
Found 60 video files
Sample files:
  1. A015_02_DDK_PATAKA_color_25fps_6sec_CV_A015_PATAKA_cloned.mp4
  2. S011_02_BBP_NORMAL_color_25fps_6sec_YT_S011_BBP_cloned.mp4
  3. A016_02_BBP_NORMAL_color_25fps_6sec_YT_A016_BBP_cloned.mp4
  4. A017_02_BBP_NORMAL_color_25fps_6sec_CV_A017_BBP_cloned.mp4
  5. A010_02_BBP_NORMAL_color_25fps_6sec_XT_A010_BBP_cloned.mp4
Calculating LSE scores for RAW videos in: /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/results/test_raw/v15/RAW
Running command: python calculate_scores_LRS.py --data_root /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/muse-exp/MuseTalk/results/test_raw/v15/RAW --tmp_dir tmp_dir_RAW
STDOUT: Average Confidence: 2.7150182565053305
Average Minimum Distance: 11.33853661219279

STDE

In [14]:
import warnings
import os
import pandas as pd
import numpy as np
import subprocess
import glob
import shutil
from collections import defaultdict

# Suppress warnings
warnings.filterwarnings('ignore')
os.environ['PYTHONWARNINGS'] = 'ignore'

def extract_info_from_edtalk_filename(filename):
    """Extract speaker ID and TTS method from EDTalk filename"""
    basename = os.path.basename(filename)
    
    # Extract TTS method
    if "_CV_" in basename:
        tts_method = "CosyVoice"
    elif "_XT_" in basename:
        tts_method = "XTTSv2"
    elif "_YT_" in basename:
        tts_method = "YourTTS"
    else:
        tts_method = "Unknown"
    
    # Extract speaker ID (first part)
    speaker_id = basename.split('_')[0]  # A002, A006, S001, etc.
    
    # Check if it's avatar or not
    is_avatar = "_avatar_" in basename
    
    return speaker_id, tts_method, is_avatar

def create_temp_folder_with_512_files(video_folder, dataset_name):
    """Create temporary folder containing only 512 resolution files for LSE calculation"""
    temp_folder = f"temp_512_{dataset_name}"
    
    # Clean up if exists
    if os.path.exists(temp_folder):
        shutil.rmtree(temp_folder)
    
    # Create temp folder
    os.makedirs(temp_folder, exist_ok=True)
    
    # Get all 512.mp4 files
    video_files_512 = glob.glob(os.path.join(video_folder, "*_512.mp4"))
    
    # Copy 512 files to temp folder
    for video_file in video_files_512:
        basename = os.path.basename(video_file)
        temp_path = os.path.join(temp_folder, basename)
        shutil.copy2(video_file, temp_path)
    
    print(f"Created temp folder with {len(video_files_512)} files")
    return temp_folder, video_files_512

def calculate_lse_batch(video_folder, dataset_name):
    """Calculate LSE scores for 512 resolution videos only using calculate_scores_LRS.py"""
    print(f"Calculating LSE scores for {dataset_name} videos in: {video_folder}")
    
    # Create temp folder with only 512 files
    temp_512_folder, video_files_512 = create_temp_folder_with_512_files(video_folder, dataset_name)
    
    if len(video_files_512) == 0:
        print(f"No 512 resolution files found in {video_folder}")
        return None, None, []
    
    # Create temporary directory for calculation
    tmp_dir = f"tmp_dir_{dataset_name}"
    
    try:
        # Run syncnet evaluation using calculate_scores_LRS.py on temp folder
        cmd = [
            'python', 'calculate_scores_LRS.py', 
            '--data_root', temp_512_folder,
            '--tmp_dir', tmp_dir
        ]
        
        # Suppress warnings
        env = os.environ.copy()
        env['PYTHONWARNINGS'] = 'ignore'
        
        print(f"Running command: {' '.join(cmd)}")
        result = subprocess.run(cmd, capture_output=True, text=True, env=env)
        
        print("STDOUT:", result.stdout)
        if result.stderr:
            print("STDERR:", result.stderr)
        
        # Parse results from stdout
        output_lines = result.stdout.split('\n')
        
        avg_confidence = None
        avg_min_distance = None
        
        for line in output_lines:
            if 'Average Confidence:' in line:
                try:
                    avg_confidence = float(line.split('Average Confidence:')[1].strip())
                except:
                    pass
            if 'Average Minimum Distance:' in line:
                try:
                    avg_min_distance = float(line.split('Average Minimum Distance:')[1].strip())
                except:
                    pass

        print(f"\n{dataset_name} Results:")
        print(f"  Average Confidence (LSE-C): {avg_confidence:.4f}" if avg_confidence else "  LSE-C: Not found")
        print(f"  Average Minimum Distance (LSE-D): {avg_min_distance:.4f}" if avg_min_distance else "  LSE-D: Not found")
        
        return avg_confidence, avg_min_distance, video_files_512
            
    except Exception as e:
        print(f"Error running syncnet: {e}")
        return None, None, []
    finally:
        # Clean up temporary directories
        if os.path.exists(tmp_dir):
            shutil.rmtree(tmp_dir)
        if os.path.exists(temp_512_folder):
            shutil.rmtree(temp_512_folder)

# Define video folders to evaluate for EDTalk
video_folders = [
    # Non-avatar (v15) results
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_raw/v15/RAW",
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_vctk/v15/VCTK",
    # Avatar results
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_raw/avatar/RAW",
    "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_vctk/avatar/VCTK"
]

dataset_names = ["RAW_v15", "VCTK_v15", "RAW_avatar", "VCTK_avatar"]

all_results = []
lse_d_scores = defaultdict(list)
lse_c_scores = defaultdict(list)
speaker_scores = defaultdict(lambda: defaultdict(list))

print("Starting LSE evaluation for EDTalk results...")
print("=" * 70)

for dataset_name, video_folder in zip(dataset_names, video_folders):
    if not os.path.exists(video_folder):
        print(f"Warning: Video folder not found: {video_folder}")
        continue
    
    print(f"\nProcessing {dataset_name} dataset...")
    print(f"Folder: {video_folder}")
    
    # Calculate LSE scores for this dataset (512 files only)
    lse_c, lse_d, video_files_512 = calculate_lse_batch(video_folder, dataset_name)
    
    if lse_c is not None and lse_d is not None and len(video_files_512) > 0:
        print(f"\n{dataset_name} Overall Scores:")
        print(f"  LSE-C (Confidence): {lse_c:.4f}")
        print(f"  LSE-D (Distance): {lse_d:.4f}")
        print(f"  Total 512 res files processed: {len(video_files_512)}")
        
        # Show first few files
        print("Sample files processed:")
        for i, vf in enumerate(video_files_512[:5]):
            print(f"  {i+1}. {os.path.basename(vf)}")
        
        # Process each 512 video file to get individual info
        for video_file in video_files_512:
            speaker_id, tts_method, is_avatar = extract_info_from_edtalk_filename(video_file)
            
            # Determine data type
            data_type = "Avatar" if "avatar" in dataset_name else "V15"
            base_dataset = dataset_name.split('_')[0]  # RAW or VCTK
            
            # Store individual result
            result = {
                'dataset': base_dataset,
                'data_type': data_type,
                'speaker_id': speaker_id,
                'tts_method': tts_method,
                'is_avatar': is_avatar,
                'lse_d_score': lse_d,
                'lse_c_score': lse_c,
                'video_file': os.path.basename(video_file)
            }
            all_results.append(result)
            
            # Store for averaging by dataset and TTS method
            key = f"{dataset_name}_{tts_method}"
            lse_d_scores[key].append(lse_d)
            lse_c_scores[key].append(lse_c)
            
            # Store for averaging by speaker
            speaker_key = f"{dataset_name}_{speaker_id}"
            speaker_scores[speaker_key]['lse_d'].append(lse_d)
            speaker_scores[speaker_key]['lse_c'].append(lse_c)
            speaker_scores[speaker_key]['tts_methods'].append(tts_method)
    else:
        print(f"Failed to get LSE scores for {dataset_name} or no files found")

print("\n" + "=" * 70)
print("EDTalk LSE EVALUATION COMPLETE")
print("=" * 70)

# Convert to DataFrame
df = pd.DataFrame(all_results)

# Display results
if len(df) > 0:
    print("\nIndividual Results:")
    print(df[['dataset', 'data_type', 'speaker_id', 'tts_method', 'lse_d_score', 'lse_c_score']].to_string(index=False))
    
    # Averages per dataset, data_type and TTS method
    print("\n" + "=" * 70)
    print("AVERAGES PER DATASET, DATA TYPE AND TTS METHOD")
    print("=" * 70)
    
    tts_summary_data = []
    for dataset in ['RAW', 'VCTK']:
        for data_type in ['v15', 'avatar']:
            for tts_method in ['CosyVoice', 'XTTSv2', 'YourTTS']:
                key = f"{dataset}_{data_type}_{tts_method}"
                if key in lse_d_scores and len(lse_d_scores[key]) > 0:
                    avg_lse_d = np.mean(lse_d_scores[key])
                    std_lse_d = np.std(lse_d_scores[key])
                    avg_lse_c = np.mean(lse_c_scores[key])
                    std_lse_c = np.std(lse_c_scores[key])
                    count = len(lse_d_scores[key])
                    
                    print(f"\n{dataset} - {data_type} - {tts_method} ({count} samples):")
                    print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
                    print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
                    
                    tts_summary_data.append({
                        'Dataset': dataset,
                        'Data_Type': data_type,
                        'TTS_Method': tts_method,
                        'Count': count,
                        'LSE_D_Mean': avg_lse_d,
                        'LSE_D_Std': std_lse_d,
                        'LSE_C_Mean': avg_lse_c,
                        'LSE_C_Std': std_lse_c
                    })
    
    # Averages per speaker
    print("\n" + "=" * 70)
    print("AVERAGES PER SPEAKER")
    print("=" * 70)
    
    speaker_summary_data = []
    for speaker_key in sorted(speaker_scores.keys()):
        if len(speaker_scores[speaker_key]['lse_d']) > 0:
            avg_lse_d = np.mean(speaker_scores[speaker_key]['lse_d'])
            std_lse_d = np.std(speaker_scores[speaker_key]['lse_d'])
            avg_lse_c = np.mean(speaker_scores[speaker_key]['lse_c'])
            std_lse_c = np.std(speaker_scores[speaker_key]['lse_c'])
            count = len(speaker_scores[speaker_key]['lse_d'])
            
            tts_methods = list(set(speaker_scores[speaker_key]['tts_methods']))
            parts = speaker_key.split('_')
            dataset_name = parts[0]
            data_type = parts[1]
            speaker_id = '_'.join(parts[2:])
            
            print(f"\n{speaker_key} ({count} samples, TTS: {', '.join(tts_methods)}):")
            print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
            print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
            
            speaker_summary_data.append({
                'Dataset': dataset_name,
                'Data_Type': data_type,
                'Speaker_ID': speaker_id,
                'Count': count,
                'LSE_D_Mean': avg_lse_d,
                'LSE_D_Std': std_lse_d,
                'LSE_C_Mean': avg_lse_c,
                'LSE_C_Std': std_lse_c,
                'TTS_Methods': ', '.join(tts_methods)
            })
    
    # Create and display summary tables
    print("\n" + "=" * 70)
    print("SUMMARY TABLES")
    print("=" * 70)
    
    # TTS Method Summary
    tts_summary_df = pd.DataFrame(tts_summary_data)
    print("\nSummary by Dataset, Data Type and TTS Method:")
    print(tts_summary_df.round(4).to_string(index=False))
    
    # Speaker Summary
    speaker_summary_df = pd.DataFrame(speaker_summary_data)
    print("\nSummary by Speaker:")
    print(speaker_summary_df.round(4).to_string(index=False))
    
    # Overall statistics per dataset and data type
    print("\n" + "=" * 70)
    print("OVERALL STATISTICS PER DATASET AND DATA TYPE")
    print("=" * 70)
    
    for dataset in ['RAW', 'VCTK']:
        for data_type in ['V15', 'Avatar']:
            dataset_results = df[(df['dataset'] == dataset) & (df['data_type'] == data_type)]
            if len(dataset_results) > 0:
                avg_lse_d = dataset_results['lse_d_score'].mean()
                std_lse_d = dataset_results['lse_d_score'].std()
                avg_lse_c = dataset_results['lse_c_score'].mean()
                std_lse_c = dataset_results['lse_c_score'].std()
                
                print(f"\n{dataset} - {data_type} Dataset (512 resolution):")
                print(f"  Total samples: {len(dataset_results)}")
                print(f"  LSE-D: {avg_lse_d:.4f} ± {std_lse_d:.4f}")
                print(f"  LSE-C: {avg_lse_c:.4f} ± {std_lse_c:.4f}")
    
    # Comparison between Avatar and V15
    print("\n" + "=" * 70)
    print("AVATAR vs V15 COMPARISON")
    print("=" * 70)
    
    comparison_data = []
    for dataset in ['RAW', 'VCTK']:
        for tts_method in ['CosyVoice', 'XTTSv2', 'YourTTS']:
            v15_results = df[(df['dataset'] == dataset) & (df['data_type'] == 'V15') & (df['tts_method'] == tts_method)]
            avatar_results = df[(df['dataset'] == dataset) & (df['data_type'] == 'Avatar') & (df['tts_method'] == tts_method)]
            
            if len(v15_results) > 0 and len(avatar_results) > 0:
                v15_lse_d = v15_results['lse_d_score'].mean()
                v15_lse_c = v15_results['lse_c_score'].mean()
                avatar_lse_d = avatar_results['lse_d_score'].mean()
                avatar_lse_c = avatar_results['lse_c_score'].mean()
                
                print(f"\n{dataset} - {tts_method}:")
                print(f"  V15    - LSE-D: {v15_lse_d:.4f}, LSE-C: {v15_lse_c:.4f}")
                print(f"  Avatar - LSE-D: {avatar_lse_d:.4f}, LSE-C: {avatar_lse_c:.4f}")
                print(f"  Improvement (D): {v15_lse_d - avatar_lse_d:+.4f}")
                print(f"  Improvement (C): {avatar_lse_c - v15_lse_c:+.4f}")
                
                comparison_data.append({
                    'Dataset': dataset,
                    'TTS_Method': tts_method,
                    'V15_LSE_D': v15_lse_d,
                    'V15_LSE_C': v15_lse_c,
                    'Avatar_LSE_D': avatar_lse_d,
                    'Avatar_LSE_C': avatar_lse_c,
                    'LSE_D_Improvement': v15_lse_d - avatar_lse_d,
                    'LSE_C_Improvement': avatar_lse_c - v15_lse_c
                })
    
    comparison_df = pd.DataFrame(comparison_data)
    print("\nDetailed Avatar vs V15 Comparison:")
    print(comparison_df.round(4).to_string(index=False))
    
    # Save results to CSV
    output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_results_lse_edtalk.csv"
    df.to_csv(output_path, index=False)
    print(f"\nDetailed results saved to: {output_path}")
    
    # Save TTS summary
    tts_summary_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_summary_lse_edtalk_tts.csv"
    tts_summary_df.to_csv(tts_summary_output_path, index=False)
    print(f"TTS summary saved to: {tts_summary_output_path}")
    
    # Save speaker summary
    speaker_summary_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_summary_lse_edtalk_speaker.csv"
    speaker_summary_df.to_csv(speaker_summary_output_path, index=False)
    print(f"Speaker summary saved to: {speaker_summary_output_path}")
    
    # Save comparison
    comparison_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_comparison_avatar_v15.csv"
    comparison_df.to_csv(comparison_output_path, index=False)
    print(f"Avatar vs V15 comparison saved to: {comparison_output_path}")

else:
    print("No results found!")

print("\nEDTalk LSE evaluation completed successfully!")

Starting LSE evaluation for EDTalk results...

Processing RAW_v15 dataset...
Folder: /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_raw/v15/RAW
Calculating LSE scores for RAW_v15 videos in: /home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/edtalk-exp/EDTalk/results/test_raw/v15/RAW
Created temp folder with 60 files
Running command: python calculate_scores_LRS.py --data_root temp_512_RAW_v15 --tmp_dir tmp_dir_RAW_v15
STDOUT: Average Confidence: 2.967564654350281
Average Minimum Distance: 9.928580323855082

STDERR: 
  0%|          | 0/60 [00:00<?, ?it/s]
Avg Confidence: 0.97, Avg Minimum Dist: 11.472:   0%|          | 0/60 [00:03<?, ?it/s]
Avg Confidence: 0.97, Avg Minimum Dist: 11.472:   0%|          | 0/60 [00:03<?, ?it/s]
Avg Confidence: 0.97, Avg Minimum Dist: 11.472:   2%|▏         | 1/60 [00:03<03:28,  3.53s/it]
Avg Confidence: 2.712, Avg Minimum Dist: 9.642:   2%|▏         | 1/60 [00:06<03:28,  3.53s/it]
Avg Confi

In [15]:
# Disease-based LSE-C Evaluation Script
import pandas as pd
import numpy as np
import os

def load_lse_results():
    """Load all existing LSE evaluation CSV files"""
    results = {}
    
    # MuseTalk LSE results
    musetalk_lse = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_results_lse.csv"
    
    # EDTalk LSE results
    edtalk_lse = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/evaluation_results_lse_edtalk.csv"
    
    # Load MuseTalk LSE
    if os.path.exists(musetalk_lse):
        df = pd.read_csv(musetalk_lse)
        df['model'] = 'MuseTalk'
        df['data_type'] = 'V15'  # MuseTalk only has V15 type
        results['musetalk'] = df
        print(f"Loaded MuseTalk LSE results: {len(df)} records")
    
    # Load EDTalk LSE (contains both V15 and Avatar)
    if os.path.exists(edtalk_lse):
        df = pd.read_csv(edtalk_lse)
        df['model'] = 'EDTalk'
        results['edtalk'] = df
        print(f"Loaded EDTalk LSE results: {len(df)} records")
    
    return results

def get_disease_category(speaker_id):
    """Determine disease category from speaker ID"""
    if speaker_id.startswith('A'):
        return 'ALS'
    elif speaker_id.startswith('S'):
        return 'Stroke'
    else:
        return 'Unknown'

def calculate_lse_disease_metrics(df, model_name):
    """Calculate LSE metrics per disease category"""
    summary_data = []
    
    # Add disease category
    df['disease'] = df['speaker_id'].apply(get_disease_category)
    
    # TTS methods to evaluate
    tts_methods = ['CosyVoice', 'XTTSv2', 'YourTTS']
    
    # Get unique combinations
    unique_combinations = df[['dataset', 'data_type']].drop_duplicates()
    
    for _, combo in unique_combinations.iterrows():
        dataset = combo['dataset']
        data_type = combo['data_type']
        
        combo_data = df[(df['dataset'] == dataset) & (df['data_type'] == data_type)]
        
        for disease in ['ALS', 'Stroke']:
            disease_data = combo_data[combo_data['disease'] == disease]
            
            if len(disease_data) > 0:
                for tts_method in tts_methods:
                    tts_data = disease_data[disease_data['tts_method'] == tts_method]
                    
                    if len(tts_data) > 0:
                        summary_data.append({
                            'Model': model_name,
                            'Dataset': dataset,
                            'Data_Type': data_type,
                            'TTS_Method': tts_method,
                            'Disease': disease,
                            'Count': len(tts_data),
                            'LSE_D_Mean': tts_data['lse_d_score'].mean(),
                            'LSE_D_Std': tts_data['lse_d_score'].std(),
                            'LSE_C_Mean': tts_data['lse_c_score'].mean(),
                            'LSE_C_Std': tts_data['lse_c_score'].std()
                        })
    
    return summary_data

def main_lse_disease_evaluation():
    """Main function to generate disease-based LSE evaluation"""
    print("=" * 80)
    print("LSE-C DISEASE-BASED EVALUATION (ALS vs STROKE)")
    print("=" * 80)
    
    # Load all LSE results
    results = load_lse_results()
    
    all_summaries = []
    
    # Process MuseTalk
    if 'musetalk' in results:
        print("\nProcessing MuseTalk LSE results...")
        summaries = calculate_lse_disease_metrics(results['musetalk'], 'MuseTalk')
        all_summaries.extend(summaries)
        print(f"Generated {len(summaries)} entries")
    
    # Process EDTalk
    if 'edtalk' in results:
        print("\nProcessing EDTalk LSE results...")
        summaries = calculate_lse_disease_metrics(results['edtalk'], 'EDTalk')
        all_summaries.extend(summaries)
        print(f"Generated {len(summaries)} entries")
    
    # Create final summary DataFrame
    if all_summaries:
        final_df = pd.DataFrame(all_summaries)
        
        # Sort for better readability
        final_df = final_df.sort_values(['Model', 'Dataset', 'Data_Type', 'Disease', 'TTS_Method'])
        
        print("\n" + "=" * 80)
        print("FINAL LSE RESEARCH TABLE - DISEASE-BASED METRICS")
        print("=" * 80)
        print(final_df.round(4).to_string(index=False))
        
        # Save to CSV
        output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/lse_disease_based_evaluation_summary.csv"
        final_df.to_csv(output_path, index=False)
        print(f"\nLSE results saved to: {output_path}")
        
        # Generate summary statistics for research paper
        print("\n" + "=" * 80)
        print("LSE RESEARCH PAPER SUMMARY")
        print("=" * 80)
        
        for model in ['MuseTalk', 'EDTalk']:
            model_data = final_df[final_df['Model'] == model]
            if len(model_data) > 0:
                print(f"\n{model}:")
                
                # For MuseTalk, only V15 exists
                data_types = ['V15'] if model == 'MuseTalk' else ['V15', 'Avatar']
                
                for dataset in ['RAW', 'VCTK']:
                    dataset_data = model_data[model_data['Dataset'] == dataset]
                    if len(dataset_data) > 0:
                        print(f"  {dataset} Dataset:")
                        
                        for data_type in data_types:
                            dtype_data = dataset_data[dataset_data['Data_Type'] == data_type]
                            if len(dtype_data) > 0:
                                print(f"    {data_type}:")
                                
                                for disease in ['ALS', 'Stroke']:
                                    disease_data = dtype_data[dtype_data['Disease'] == disease]
                                    if len(disease_data) > 0:
                                        avg_lse_c = disease_data['LSE_C_Mean'].mean()
                                        avg_lse_d = disease_data['LSE_D_Mean'].mean()
                                        total_samples = disease_data['Count'].sum()
                                        
                                        print(f"      {disease} (Total: {total_samples} samples):")
                                        print(f"        Avg LSE-C across TTS: {avg_lse_c:.4f}")
                                        print(f"        Avg LSE-D across TTS: {avg_lse_d:.4f}")
        
        # Create simplified table format for research paper
        print("\n" + "=" * 80)
        print("SIMPLIFIED LSE TABLE FOR RESEARCH PAPER")
        print("=" * 80)
        
        # Group by Model, Dataset, Data_Type for table format
        table_data = []
        for model in ['MuseTalk', 'EDTalk']:
            for dataset in ['RAW', 'VCTK']:
                data_types = ['V15'] if model == 'MuseTalk' else ['V15', 'Avatar']
                
                for data_type in data_types:
                    model_dataset_data = final_df[
                        (final_df['Model'] == model) & 
                        (final_df['Dataset'] == dataset) & 
                        (final_df['Data_Type'] == data_type)
                    ]
                    
                    if len(model_dataset_data) > 0:
                        # Calculate average across all TTS methods for each disease
                        als_data = model_dataset_data[model_dataset_data['Disease'] == 'ALS']
                        stroke_data = model_dataset_data[model_dataset_data['Disease'] == 'Stroke']
                        
                        row = {
                            'Dataset': dataset,
                            'Model': f"{model}_{data_type}" if model == 'EDTalk' else model,
                            'ALS_LSE_C': als_data['LSE_C_Mean'].mean() if len(als_data) > 0 else 0,
                            'ALS_LSE_D': als_data['LSE_D_Mean'].mean() if len(als_data) > 0 else 0,
                            'ALS_Count': als_data['Count'].sum() if len(als_data) > 0 else 0,
                            'Stroke_LSE_C': stroke_data['LSE_C_Mean'].mean() if len(stroke_data) > 0 else 0,
                            'Stroke_LSE_D': stroke_data['LSE_D_Mean'].mean() if len(stroke_data) > 0 else 0,
                            'Stroke_Count': stroke_data['Count'].sum() if len(stroke_data) > 0 else 0
                        }
                        table_data.append(row)
        
        table_df = pd.DataFrame(table_data)
        if len(table_df) > 0:
            print("\nSIMPLIFIED LSE TABLE (Averaged across TTS methods):")
            print(table_df.round(4).to_string(index=False))
            
            # Save simplified table
            simple_output_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/simplified_lse_disease_table.csv"
            table_df.to_csv(simple_output_path, index=False)
            print(f"\nSimplified LSE table saved to: {simple_output_path}")
        
        # Show comparison between Avatar and V15 for EDTalk
        print("\n" + "=" * 80)
        print("EDTALK: AVATAR vs V15 COMPARISON (LSE-C)")
        print("=" * 80)
        
        edtalk_data = final_df[final_df['Model'] == 'EDTalk']
        comparison_results = []
        
        for dataset in ['RAW', 'VCTK']:
            for disease in ['ALS', 'Stroke']:
                v15_data = edtalk_data[
                    (edtalk_data['Dataset'] == dataset) & 
                    (edtalk_data['Data_Type'] == 'V15') & 
                    (edtalk_data['Disease'] == disease)
                ]
                avatar_data = edtalk_data[
                    (edtalk_data['Dataset'] == dataset) & 
                    (edtalk_data['Data_Type'] == 'Avatar') & 
                    (edtalk_data['Disease'] == disease)
                ]
                
                if len(v15_data) > 0 and len(avatar_data) > 0:
                    v15_lse_c = v15_data['LSE_C_Mean'].mean()
                    avatar_lse_c = avatar_data['LSE_C_Mean'].mean()
                    improvement = avatar_lse_c - v15_lse_c
                    
                    print(f"\n{dataset} - {disease}:")
                    print(f"  V15:    LSE-C = {v15_lse_c:.4f}")
                    print(f"  Avatar: LSE-C = {avatar_lse_c:.4f}")
                    print(f"  Improvement: {improvement:+.4f}")
                    
                    comparison_results.append({
                        'Dataset': dataset,
                        'Disease': disease,
                        'V15_LSE_C': v15_lse_c,
                        'Avatar_LSE_C': avatar_lse_c,
                        'Improvement': improvement
                    })
        
        if comparison_results:
            comparison_df = pd.DataFrame(comparison_results)
            print(f"\nAvatar vs V15 Comparison Summary:")
            print(comparison_df.round(4).to_string(index=False))
        
        # Final table format matching the research table structure
        print("\n" + "=" * 80)
        print("FINAL TABLE FORMAT FOR RESEARCH PAPER (LSE-C only)")
        print("=" * 80)
        
        # Restructure data to match the table format
        research_table_data = []
        
        # MuseTalk
        musetalk_data = final_df[final_df['Model'] == 'MuseTalk']
        for dataset in ['RAW', 'VCTK']:
            dataset_data = musetalk_data[musetalk_data['Dataset'] == dataset]
            if len(dataset_data) > 0:
                als_lse_c = dataset_data[dataset_data['Disease'] == 'ALS']['LSE_C_Mean'].mean()
                stroke_lse_c = dataset_data[dataset_data['Disease'] == 'Stroke']['LSE_C_Mean'].mean()
                
                research_table_data.append({
                    'Dataset': dataset,
                    'Model': 'MuseTalk',
                    'ALS_LSE_C': als_lse_c if not np.isnan(als_lse_c) else 0,
                    'Stroke_LSE_C': stroke_lse_c if not np.isnan(stroke_lse_c) else 0
                })
        
        # EDTalk V15
        edtalk_v15_data = final_df[(final_df['Model'] == 'EDTalk') & (final_df['Data_Type'] == 'V15')]
        for dataset in ['RAW', 'VCTK']:
            dataset_data = edtalk_v15_data[edtalk_v15_data['Dataset'] == dataset]
            if len(dataset_data) > 0:
                als_lse_c = dataset_data[dataset_data['Disease'] == 'ALS']['LSE_C_Mean'].mean()
                stroke_lse_c = dataset_data[dataset_data['Disease'] == 'Stroke']['LSE_C_Mean'].mean()
                
                research_table_data.append({
                    'Dataset': dataset,
                    'Model': 'EDTalk',
                    'ALS_LSE_C': als_lse_c if not np.isnan(als_lse_c) else 0,
                    'Stroke_LSE_C': stroke_lse_c if not np.isnan(stroke_lse_c) else 0
                })
        
        # EDTalk Avatar
        edtalk_avatar_data = final_df[(final_df['Model'] == 'EDTalk') & (final_df['Data_Type'] == 'Avatar')]
        for dataset in ['RAW', 'VCTK']:
            dataset_data = edtalk_avatar_data[edtalk_avatar_data['Dataset'] == dataset]
            if len(dataset_data) > 0:
                als_lse_c = dataset_data[dataset_data['Disease'] == 'ALS']['LSE_C_Mean'].mean()
                stroke_lse_c = dataset_data[dataset_data['Disease'] == 'Stroke']['LSE_C_Mean'].mean()
                
                research_table_data.append({
                    'Dataset': dataset,
                    'Model': 'Avatar-EDTalk',
                    'ALS_LSE_C': als_lse_c if not np.isnan(als_lse_c) else 0,
                    'Stroke_LSE_C': stroke_lse_c if not np.isnan(stroke_lse_c) else 0
                })
        
        if research_table_data:
            research_table_df = pd.DataFrame(research_table_data)
            print("\nFINAL RESEARCH TABLE (LSE-C values):")
            print(research_table_df.round(4).to_string(index=False))
            
            # Save final research table
            research_table_path = "/home/is/lathifgalih-k/research_naist/multimodal-clone/clone-video/final_lse_research_table.csv"
            research_table_df.to_csv(research_table_path, index=False)
            print(f"\nFinal research table saved to: {research_table_path}")
    
    else:
        print("No LSE evaluation results found!")

# Run the LSE disease-based evaluation
main_lse_disease_evaluation()

LSE-C DISEASE-BASED EVALUATION (ALS vs STROKE)
Loaded MuseTalk LSE results: 120 records
Loaded EDTalk LSE results: 240 records

Processing MuseTalk LSE results...
Generated 12 entries

Processing EDTalk LSE results...
Generated 24 entries

FINAL LSE RESEARCH TABLE - DISEASE-BASED METRICS
   Model Dataset Data_Type TTS_Method Disease  Count  LSE_D_Mean  LSE_D_Std  LSE_C_Mean  LSE_C_Std
  EDTalk     RAW    Avatar  CosyVoice     ALS      9      9.0959        0.0      4.4569        0.0
  EDTalk     RAW    Avatar     XTTSv2     ALS      9      9.0959        0.0      4.4569        0.0
  EDTalk     RAW    Avatar    YourTTS     ALS      9      9.0959        0.0      4.4569        0.0
  EDTalk     RAW    Avatar  CosyVoice  Stroke     11      9.0959        0.0      4.4569        0.0
  EDTalk     RAW    Avatar     XTTSv2  Stroke     11      9.0959        0.0      4.4569        0.0
  EDTalk     RAW    Avatar    YourTTS  Stroke     11      9.0959        0.0      4.4569        0.0
  EDTalk     RAW  