# Advanced Model Training Pipeline
Bu notebook farklı model mimarileri ve loss fonksiyonları ile kapsamlı model eğitimi yapar.

In [1]:
# Configuration for the training pipeline
config = {
    "latent_dir": "latent_representations",
    "output_dir": "advanced_vae_models",
    "chunk_size": 10,
    "max_samples": None,  # None = tüm veri
    "run_all_configs": True,  # Tüm konfigürasyonları çalıştır
    "run_comparison": True,  # Model karşılaştırması yap
    "run_fine_tuning": True,  # Fine-tuning yap
}

# Fine-tuning configuration
fine_tune_config = {
    "lr": 1e-5,  # Düşük learning rate
    "epochs": 50,  # Daha az epoch
    "freeze_encoder": False,  # Encoder'ı dondurma
    "freeze_decoder": False,  # Decoder'ı dondurma
}

print("Configuration loaded successfully!")
print(f"Output directory: {config['output_dir']}")
print(f"Using chunk size: {config['chunk_size']}s")

Configuration loaded successfully!
Output directory: advanced_vae_models
Using chunk size: 10s


In [2]:
config

{'latent_dir': 'latent_representations',
 'output_dir': 'advanced_vae_models',
 'chunk_size': 10,
 'max_samples': None,
 'run_all_configs': True,
 'run_comparison': True,
 'run_fine_tuning': True}

# Required Libraries

In [3]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import warnings

# Import our advanced training utilities
import utils._advanced_training
from utils._advanced_training import *

%matplotlib inline
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')

print(f"Using PyTorch {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using PyTorch 2.7.0
CUDA available: False


# Model Configuration Templates

## Available Model Architectures
- **Standard**: Geliştirilmiş standart VAE mimarisi
- **Residual**: Residual bağlantılı derin ağ
- **Deep**: Daha derin çok katmanlı ağ
- **Hierarchical**: Hiyerarşik VAE mimarisi

## Available Loss Functions
- **beta_vae**: Standard Beta-VAE loss
- **annealed**: Linearly annealed KL term
- **cyclical**: Cyclical annealing of KL term  
- **emotion_weighted**: Emotion-aware weighted loss
- **hierarchical**: Hierarchical VAE loss

In [8]:
# Show available configurations
print("Available pre-configured model templates:")
print("=" * 50)

for config_name, config in CONFIG_TEMPLATES.items():
    model_config = config['model_config']
    training_config = config['training_config']
    
    print(f"\n{config_name.upper()}:")
    print(f"  Architecture: {model_config['architecture']}")
    print(f"  Hidden dims: {model_config['hidden_dims']}")
    print(f"  Loss type: {training_config['loss_type']}")
    print(f"  Optimizer: {training_config['optimizer']}")
    print(f"  Learning rate: {training_config['lr']}")
    print(f"  Batch size: {training_config['batch_size']}")
    print(f"  Epochs: {training_config['epochs']}")

Available pre-configured model templates:

STANDARD_BETA:
  Architecture: standard
  Hidden dims: [512, 256, 128]
  Loss type: beta_vae
  Optimizer: adam
  Learning rate: 0.0001
  Batch size: 64
  Epochs: 100

DEEP_ANNEALED:
  Architecture: deep
  Hidden dims: [512, 256, 128]
  Loss type: annealed
  Optimizer: adamw
  Learning rate: 5e-05
  Batch size: 32
  Epochs: 150

RESIDUAL_CYCLICAL:
  Architecture: residual
  Hidden dims: [768, 384, 192]
  Loss type: cyclical
  Optimizer: adam
  Learning rate: 0.0002
  Batch size: 48
  Epochs: 120

HIERARCHICAL_EMOTION:
  Architecture: hierarchical
  Hidden dims: [512, 256]
  Loss type: emotion_weighted
  Optimizer: adam
  Learning rate: 0.0001
  Batch size: 64
  Epochs: 100


In [9]:
config

{'model_config': {'latent_dim': 750,
  'hidden_dims': [512, 256],
  'condition_dim': 2,
  'architecture': 'hierarchical',
  'dropout_rate': 0.25},
 'training_config': {'chunk_size': 10,
  'batch_size': 64,
  'epochs': 100,
  'lr': 0.0001,
  'optimizer': 'adam',
  'loss_type': 'emotion_weighted',
  'beta': 0.8,
  'use_scheduler': True,
  'grad_clip': False,
  'early_stopping_patience': 15}}

# Single Model Training

## Train a Single Model Configuration

In [None]:
# Run these command in bash: 
! pip install ipywidgets --upgrade
! jupyter nbextension enable --py widgetsnbextension
! jupyter labextension install @jupyter-widgets/jupyterlab-manager

Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
     -------------------------------------- 139.8/139.8 kB 2.1 MB/s eta 0:00:00
Collecting jupyterlab_widgets~=3.0.15
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
     -------------------------------------- 216.6/216.6 kB 4.4 MB/s eta 0:00:00
Collecting widgetsnbextension~=4.0.14
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
     ---------------------------------------- 2.2/2.2 MB 9.3 MB/s eta 0:00:00
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
Successfully installed ipywidgets-8.1.7 jupyterlab_widgets-3.0.15 widgetsnbextension-4.0.14



[notice] A new release of pip available: 22.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok
Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok


^C





Building jupyterlab assets (production, minimized)


In [11]:
data = {
    "latent_dir": "latent_representations",
    "output_dir": "advanced_vae_models",
    "chunk_size": 10,
    "max_samples": None,  # None = tüm veri
    "run_all_configs": True,  # Tüm konfigürasyonları çalıştır
    "run_comparison": True,  # Model karşılaştırması yap
    "run_fine_tuning": True,  # Fine-tuning yap
}

# Train a single model (örnek olarak residual_cyclical)
single_config_name = 'residual_cyclical'
single_config = CONFIG_TEMPLATES[single_config_name]

print(f"Training single model: {single_config_name}")
print(f"Architecture: {single_config['model_config']['architecture']}")
print(f"Loss type: {single_config['training_config']['loss_type']}")

# Create trainer
single_trainer = AdvancedVAETrainer(
    model_config=single_config['model_config'],
    training_config=single_config['training_config'],
    latent_dir=data["latent_dir"],
    output_dir=data["output_dir"]
)

# Train the model
single_stats = single_trainer.train(max_samples=data["max_samples"])

print(f"Single model training completed!")
print(f"Final loss: {single_stats['total_loss'][-1]:.4f}")
print(f"Best loss: {min(single_stats['total_loss']):.4f}")

Training single model: residual_cyclical
Architecture: residual
Loss type: cyclical
Using device: cpu
Loaded a total of 6976 latent representations.


Loading latent representations:   0%|          | 0/6976 [00:00<?, ?it/s]

Epoch 1/120:   0%|          | 0/146 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Multiple Model Training Pipeline

## Train All Model Configurations

In [13]:
if data["run_all_configs"]:
    print("Starting comprehensive training pipeline...")
    print(f"Training {len(CONFIG_TEMPLATES)} different model configurations")
    
    # Run the complete training pipeline
    all_results, comparison_df = create_training_pipeline(
        configs=CONFIG_TEMPLATES,
        latent_dir=data["latent_dir"],
        output_dir=data["output_dir"],
        max_samples=data["max_samples"]
    )
    
    print("\nTraining pipeline completed!")
    print("\nModel Performance Summary:")
    print(comparison_df.to_string(index=False))
else:
    print("Skipping multiple model training (run_all_configs=False)")

Starting comprehensive training pipeline...
Training 4 different model configurations

Training standard_beta
Using device: cpu
Loaded a total of 6976 latent representations.


Loading latent representations:   0%|          | 0/6976 [00:00<?, ?it/s]

Epoch 1/100:   0%|          | 0/109 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Model Comparison and Analysis

## Compare Model Performance

In [None]:
if config["run_comparison"]:
    # Create model comparator
    comparator = ModelComparator(config["output_dir"])
    
    # Load and compare all results
    comparison_df = comparator.create_comparison_report()
    
    # Display comparison results
    print("Model Comparison Results:")
    print("=" * 80)
    
    print("\nTop 3 Best Performing Models:")
    top_models = comparison_df.head(3)
    for idx, row in top_models.iterrows():
        print(f"{idx+1}. {row['Model']}")
        print(f"   Architecture: {row['Architecture']}")
        print(f"   Loss Type: {row['Loss Type']}")
        print(f"   Best Loss: {row['Best Loss']:.6f}")
        print(f"   Convergence: {row['Convergence Epoch']} epochs")
        print()
    
    # Show some statistics
    print("Architecture Performance:")
    arch_performance = comparison_df.groupby('Architecture')['Best Loss'].agg(['mean', 'min', 'count'])
    print(arch_performance)
    
    print("\nLoss Function Performance:")
    loss_performance = comparison_df.groupby('Loss Type')['Best Loss'].agg(['mean', 'min', 'count'])
    print(loss_performance)
    
else:
    print("Skipping model comparison (run_comparison=False)")

# Fine-tuning Best Model

## Fine-tune the Best Performing Model

In [None]:
if config["run_fine_tuning"]:
    # Load comparison results to find best model
    comparator = ModelComparator(config["output_dir"])
    comparison_df = comparator.create_comparison_report()
    
    # Get best model
    best_model_name = comparison_df.iloc[0]['Model']
    best_architecture = comparison_df.iloc[0]['Architecture']
    best_loss_type = comparison_df.iloc[0]['Loss Type']
    
    print(f"Fine-tuning best model: {best_model_name}")
    print(f"Architecture: {best_architecture}")
    print(f"Loss type: {best_loss_type}")
    
    # Find the corresponding config
    best_config = None
    for config_name, config_data in CONFIG_TEMPLATES.items():
        if (config_data['model_config']['architecture'] == best_architecture and 
            config_data['training_config']['loss_type'] == best_loss_type):
            best_config = config_data
            break
    
    if best_config:
        # Create fine-tuning trainer
        fine_tune_trainer = AdvancedVAETrainer(
            model_config=best_config['model_config'],
            training_config=best_config['training_config'],
            latent_dir=config["latent_dir"],
            output_dir=os.path.join(config["output_dir"], "fine_tuned_models")
        )
        
        # Path to best model
        best_model_path = os.path.join(config["output_dir"], best_model_name, "best_model.pt")
        
        if os.path.exists(best_model_path):
            print(f"Loading model from: {best_model_path}")
            
            # Fine-tune the model
            fine_tune_stats = fine_tune_trainer.fine_tune(
                pretrained_model_path=best_model_path,
                fine_tune_config=fine_tune_config,
                max_samples=config["max_samples"]
            )
            
            print("Fine-tuning completed!")
            print(f"Fine-tuned final loss: {fine_tune_stats['total_loss'][-1]:.4f}")
            print(f"Original best loss: {comparison_df.iloc[0]['Best Loss']:.4f}")
            
            improvement = comparison_df.iloc[0]['Best Loss'] - min(fine_tune_stats['total_loss'])
            print(f"Improvement: {improvement:.6f}")
            
        else:
            print(f"Best model file not found: {best_model_path}")
    else:
        print("Could not find matching configuration for best model")
        
else:
    print("Skipping fine-tuning (run_fine_tuning=False)")

# Custom Model Configuration

## Create and Train Custom Model

In [None]:
# Custom model configuration example
custom_model_config = {
    'latent_dim': 750,
    'hidden_dims': [1024, 512, 256, 128],  # More layers
    'condition_dim': 2,
    'architecture': 'deep',
    'dropout_rate': 0.15  # Lower dropout
}

custom_training_config = {
    'chunk_size': 10,
    'batch_size': 32,  # Smaller batch for more updates
    'epochs': 80,
    'lr': 1e-4,
    'optimizer': 'adamw',
    'loss_type': 'emotion_weighted',
    'beta': 0.7,  # Different beta value
    'weight_decay': 0.005,
    'use_scheduler': True,
    'grad_clip': True,
    'early_stopping_patience': 25
}

print("Custom model configuration:")
print(f"Architecture: {custom_model_config['architecture']}")
print(f"Hidden dims: {custom_model_config['hidden_dims']}")
print(f"Loss type: {custom_training_config['loss_type']}")
print(f"Beta: {custom_training_config['beta']}")

# Train custom model
custom_trainer = AdvancedVAETrainer(
    model_config=custom_model_config,
    training_config=custom_training_config,
    latent_dir=config["latent_dir"],
    output_dir=os.path.join(config["output_dir"], "custom_models")
)

custom_stats = custom_trainer.train(max_samples=config["max_samples"])

print(f"Custom model training completed!")
print(f"Final loss: {custom_stats['total_loss'][-1]:.4f}")
print(f"Best loss: {min(custom_stats['total_loss']):.4f}")

# Results Visualization

## Visualize Training Results

In [None]:
# Load and visualize results from a specific model
def visualize_model_results(model_name, output_dir):
    """Visualize results from a specific model"""
    model_dir = os.path.join(output_dir, model_name)
    
    if not os.path.exists(model_dir):
        print(f"Model directory not found: {model_dir}")
        return
    
    # Load training stats
    stats_path = os.path.join(model_dir, 'training_stats.json')
    if os.path.exists(stats_path):
        with open(stats_path, 'r') as f:
            stats = json.load(f)
        
        epochs = range(1, len(stats['total_loss']) + 1)
        
        plt.figure(figsize=(20, 12))
        
        # Total loss
        plt.subplot(2, 4, 1)
        plt.plot(epochs, stats['total_loss'], 'b-', linewidth=2)
        plt.title(f'{model_name}\nTotal Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.grid(True, alpha=0.3)
        
        # Loss components
        plt.subplot(2, 4, 2)
        plt.plot(epochs, stats['recon_loss'], 'g-', label='Reconstruction', linewidth=2)
        plt.plot(epochs, stats['kl_loss'], 'r-', label='KL Divergence', linewidth=2)
        plt.title('Loss Components')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # Learning rate
        plt.subplot(2, 4, 3)
        if 'learning_rate' in stats and stats['learning_rate']:
            plt.plot(epochs, stats['learning_rate'], 'm-', linewidth=2)
            plt.title('Learning Rate')
            plt.xlabel('Epochs')
            plt.ylabel('LR')
            plt.grid(True, alpha=0.3)
        
        # Loss ratio
        plt.subplot(2, 4, 4)
        ratio = np.array(stats['kl_loss']) / (np.array(stats['recon_loss']) + 1e-8)
        plt.plot(epochs, ratio, 'orange', linewidth=2)
        plt.title('KL/Reconstruction Ratio')
        plt.xlabel('Epochs')
        plt.ylabel('Ratio')
        plt.grid(True, alpha=0.3)
        
        # Log scale losses
        plt.subplot(2, 4, 5)
        plt.semilogy(epochs, stats['total_loss'], 'b-', linewidth=2)
        plt.title('Total Loss (Log Scale)')
        plt.xlabel('Epochs')
        plt.ylabel('Log Loss')
        plt.grid(True, alpha=0.3)
        
        # Moving averages
        plt.subplot(2, 4, 6)
        window = min(10, len(stats['total_loss']) // 4)
        if window > 1:
            moving_avg = pd.Series(stats['total_loss']).rolling(window=window).mean()
            plt.plot(epochs, stats['total_loss'], 'b-', alpha=0.3, label='Original')
            plt.plot(epochs, moving_avg, 'b-', linewidth=2, label=f'MA({window})')
            plt.title('Smoothed Loss')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            plt.grid(True, alpha=0.3)
        
        # Loss histogram
        plt.subplot(2, 4, 7)
        plt.hist(stats['total_loss'], bins=20, alpha=0.7, color='blue')
        plt.title('Loss Distribution')
        plt.xlabel('Loss Value')
        plt.ylabel('Frequency')
        plt.grid(True, alpha=0.3)
        
        # Convergence analysis
        plt.subplot(2, 4, 8)
        # Calculate running minimum
        running_min = []
        current_min = float('inf')
        for loss in stats['total_loss']:
            if loss < current_min:
                current_min = loss
            running_min.append(current_min)
        
        plt.plot(epochs, running_min, 'g-', linewidth=2)
        plt.title('Best Loss Over Time')
        plt.xlabel('Epochs')
        plt.ylabel('Best Loss')
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(os.path.join(model_dir, 'detailed_analysis.png'), dpi=300, bbox_inches='tight')
        plt.show()
        
        # Print summary statistics
        print(f"\n{model_name} Training Summary:")
        print(f"  Final Loss: {stats['total_loss'][-1]:.6f}")
        print(f"  Best Loss: {min(stats['total_loss']):.6f}")
        print(f"  Best Epoch: {np.argmin(stats['total_loss']) + 1}")
        print(f"  Total Epochs: {len(stats['total_loss'])}")
        print(f"  Loss Improvement: {stats['total_loss'][0] - min(stats['total_loss']):.6f}")
        
        # Convergence analysis
        best_idx = np.argmin(stats['total_loss'])
        if best_idx < len(stats['total_loss']) - 10:
            print(f"  Early convergence detected at epoch {best_idx + 1}")
        else:
            print(f"  Model still improving at end of training")

# Visualize results from available models
output_dir = config["output_dir"]
if os.path.exists(output_dir):
    available_models = [d for d in os.listdir(output_dir) 
                       if os.path.isdir(os.path.join(output_dir, d)) and 
                       os.path.exists(os.path.join(output_dir, d, 'training_stats.json'))]
    
    print(f"Found {len(available_models)} trained models:")
    for i, model in enumerate(available_models):
        print(f"{i+1}. {model}")
    
    # Visualize the first available model (or specify one)
    if available_models:
        model_to_visualize = available_models[0]  # Change index to visualize different model
        print(f"\nVisualizing results for: {model_to_visualize}")
        visualize_model_results(model_to_visualize, output_dir)
else:
    print(f"Output directory not found: {output_dir}")

# Performance Analysis

## Comprehensive Performance Analysis

In [None]:
# Load all available model results for comprehensive analysis
def comprehensive_analysis(output_dir):
    """Perform comprehensive analysis of all trained models"""
    
    if not os.path.exists(output_dir):
        print(f"Output directory not found: {output_dir}")
        return
    
    model_summaries = []
    all_training_curves = {}
    
    # Collect data from all models
    for model_dir in os.listdir(output_dir):
        model_path = os.path.join(output_dir, model_dir)
        if os.path.isdir(model_path):
            stats_path = os.path.join(model_path, 'training_stats.json')
            report_path = os.path.join(model_path, 'training_report.json')
            
            if os.path.exists(stats_path) and os.path.exists(report_path):
                # Load data
                with open(stats_path, 'r') as f:
                    stats = json.load(f)
                with open(report_path, 'r') as f:
                    report = json.load(f)
                
                # Collect summary
                summary = {
                    'model_name': model_dir,
                    'architecture': report['model_config']['architecture'],
                    'loss_type': report['training_config']['loss_type'],
                    'optimizer': report['training_config']['optimizer'],
                    'learning_rate': report['training_config']['lr'],
                    'batch_size': report['training_config']['batch_size'],
                    'dropout_rate': report['model_config']['dropout_rate'],
                    'hidden_dims': report['model_config']['hidden_dims'],
                    'final_loss': stats['total_loss'][-1],
                    'best_loss': min(stats['total_loss']),
                    'convergence_epoch': np.argmin(stats['total_loss']) + 1,
                    'total_epochs': len(stats['total_loss']),
                    'final_recon_loss': stats['recon_loss'][-1],
                    'final_kl_loss': stats['kl_loss'][-1],
                    'total_parameters': report['training_summary']['total_parameters']
                }
                model_summaries.append(summary)
                
                # Store training curves
                all_training_curves[model_dir] = stats
    
    if not model_summaries:
        print("No trained models found!")
        return
    
    # Create comprehensive DataFrame
    df = pd.DataFrame(model_summaries)
    
    # Analysis and visualization
    plt.figure(figsize=(25, 20))
    
    # 1. Best loss by architecture
    plt.subplot(3, 4, 1)
    arch_performance = df.groupby('architecture')['best_loss'].agg(['mean', 'std', 'min'])
    arch_performance['mean'].plot(kind='bar', yerr=arch_performance['std'], capsize=4)
    plt.title('Mean Best Loss by Architecture')
    plt.ylabel('Loss')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # 2. Best loss by loss type
    plt.subplot(3, 4, 2)
    loss_performance = df.groupby('loss_type')['best_loss'].agg(['mean', 'std', 'min'])
    loss_performance['mean'].plot(kind='bar', yerr=loss_performance['std'], capsize=4)
    plt.title('Mean Best Loss by Loss Type')
    plt.ylabel('Loss')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # 3. Convergence speed by architecture
    plt.subplot(3, 4, 3)
    conv_performance = df.groupby('architecture')['convergence_epoch'].mean()
    conv_performance.plot(kind='bar')
    plt.title('Mean Convergence Epoch by Architecture')
    plt.ylabel('Epochs')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # 4. Parameter count vs performance
    plt.subplot(3, 4, 4)
    plt.scatter(df['total_parameters'], df['best_loss'], alpha=0.7)
    plt.xlabel('Total Parameters')
    plt.ylabel('Best Loss')
    plt.title('Parameters vs Performance')
    plt.grid(True, alpha=0.3)
    
    # 5. Learning rate vs performance
    plt.subplot(3, 4, 5)
    plt.scatter(df['learning_rate'], df['best_loss'], alpha=0.7)
    plt.xlabel('Learning Rate')
    plt.ylabel('Best Loss')
    plt.title('Learning Rate vs Performance')
    plt.xscale('log')
    plt.grid(True, alpha=0.3)
    
    # 6. Batch size vs performance
    plt.subplot(3, 4, 6)
    batch_performance = df.groupby('batch_size')['best_loss'].mean()
    batch_performance.plot(kind='bar')
    plt.title('Mean Best Loss by Batch Size')
    plt.ylabel('Loss')
    plt.grid(True, alpha=0.3)
    
    # 7. Training curves comparison (best models from each architecture)
    plt.subplot(3, 4, 7)
    best_by_arch = df.loc[df.groupby('architecture')['best_loss'].idxmin()]
    for _, row in best_by_arch.iterrows():
        model_name = row['model_name']
        if model_name in all_training_curves:
            stats = all_training_curves[model_name]
            epochs = range(1, len(stats['total_loss']) + 1)
            plt.plot(epochs, stats['total_loss'], label=f"{row['architecture']}", linewidth=2)
    plt.title('Training Curves (Best per Architecture)')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # 8. Loss component analysis
    plt.subplot(3, 4, 8)
    plt.scatter(df['final_recon_loss'], df['final_kl_loss'], 
               c=df['best_loss'], cmap='viridis', alpha=0.7)
    plt.xlabel('Final Reconstruction Loss')
    plt.ylabel('Final KL Loss')
    plt.title('Loss Components (colored by total loss)')
    plt.colorbar(label='Total Loss')
    plt.grid(True, alpha=0.3)
    
    # 9. Dropout rate effect
    plt.subplot(3, 4, 9)
    dropout_performance = df.groupby('dropout_rate')['best_loss'].mean()
    dropout_performance.plot(kind='bar')
    plt.title('Mean Best Loss by Dropout Rate')
    plt.ylabel('Loss')
    plt.grid(True, alpha=0.3)
    
    # 10. Training efficiency (best loss / epochs)
    plt.subplot(3, 4, 10)
    df['efficiency'] = df['best_loss'] / df['convergence_epoch']
    efficiency_by_arch = df.groupby('architecture')['efficiency'].mean()
    efficiency_by_arch.plot(kind='bar')
    plt.title('Training Efficiency by Architecture')
    plt.ylabel('Loss / Convergence Epoch')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    
    # 11. Optimizer comparison
    plt.subplot(3, 4, 11)
    opt_performance = df.groupby('optimizer')['best_loss'].agg(['mean', 'std'])
    opt_performance['mean'].plot(kind='bar', yerr=opt_performance['std'], capsize=4)
    plt.title('Mean Best Loss by Optimizer')
    plt.ylabel('Loss')
    plt.grid(True, alpha=0.3)
    
    # 12. Overall ranking
    plt.subplot(3, 4, 12)
    top_models = df.nsmallest(6, 'best_loss')
    model_names = [name[:15] + '...' if len(name) > 15 else name for name in top_models['model_name']]
    plt.barh(range(len(top_models)), top_models['best_loss'])
    plt.yticks(range(len(top_models)), model_names)
    plt.xlabel('Best Loss')
    plt.title('Top 6 Models by Performance')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, 'comprehensive_analysis.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    # Print detailed analysis
    print("COMPREHENSIVE MODEL ANALYSIS")
    print("=" * 80)
    
    print(f"\nTotal models analyzed: {len(df)}")
    print(f"Best overall loss: {df['best_loss'].min():.6f}")
    print(f"Worst overall loss: {df['best_loss'].max():.6f}")
    print(f"Mean loss: {df['best_loss'].mean():.6f}")
    print(f"Loss std: {df['best_loss'].std():.6f}")
    
    print("\nTOP 5 MODELS:")
    top_5 = df.nsmallest(5, 'best_loss')
    for i, (_, row) in enumerate(top_5.iterrows()):
        print(f"{i+1}. {row['model_name']}")
        print(f"   Loss: {row['best_loss']:.6f}")
        print(f"   Architecture: {row['architecture']}")
        print(f"   Loss type: {row['loss_type']}")
        print(f"   Convergence: {row['convergence_epoch']} epochs")
        print()
    
    print("ARCHITECTURE RANKING:")
    arch_ranking = df.groupby('architecture')['best_loss'].agg(['mean', 'min', 'count']).sort_values('mean')
    print(arch_ranking)
    
    print("\nLOSS FUNCTION RANKING:")
    loss_ranking = df.groupby('loss_type')['best_loss'].agg(['mean', 'min', 'count']).sort_values('mean')
    print(loss_ranking)
    
    print("\nOPTIMIZER RANKING:")
    opt_ranking = df.groupby('optimizer')['best_loss'].agg(['mean', 'min', 'count']).sort_values('mean')
    print(opt_ranking)
    
    # Save detailed results
    df.to_csv(os.path.join(output_dir, 'detailed_model_analysis.csv'), index=False)
    print(f"\nDetailed analysis saved to: {os.path.join(output_dir, 'detailed_model_analysis.csv')}")
    
    return df

# Run comprehensive analysis
analysis_df = comprehensive_analysis(config["output_dir"])

# Summary and Recommendations

In [None]:
# Final summary and recommendations
print("TRAINING PIPELINE SUMMARY")
print("=" * 60)

if 'analysis_df' in locals() and analysis_df is not None:
    best_model = analysis_df.loc[analysis_df['best_loss'].idxmin()]
    
    print(f"\nBEST MODEL OVERALL:")
    print(f"  Name: {best_model['model_name']}")
    print(f"  Architecture: {best_model['architecture']}")
    print(f"  Loss Function: {best_model['loss_type']}")
    print(f"  Optimizer: {best_model['optimizer']}")
    print(f"  Best Loss: {best_model['best_loss']:.6f}")
    print(f"  Parameters: {best_model['total_parameters']:,}")
    print(f"  Convergence: {best_model['convergence_epoch']} epochs")
    
    print(f"\nRECOMMENDations:")
    
    # Architecture recommendation
    best_arch = analysis_df.groupby('architecture')['best_loss'].mean().idxmin()
    print(f"  • Best Architecture: {best_arch}")
    
    # Loss function recommendation  
    best_loss_type = analysis_df.groupby('loss_type')['best_loss'].mean().idxmin()
    print(f"  • Best Loss Function: {best_loss_type}")
    
    # Optimizer recommendation
    best_optimizer = analysis_df.groupby('optimizer')['best_loss'].mean().idxmin()
    print(f"  • Best Optimizer: {best_optimizer}")
    
    # Learning rate analysis
    best_lr_models = analysis_df.nsmallest(3, 'best_loss')
    mean_best_lr = best_lr_models['learning_rate'].mean()
    print(f"  • Recommended Learning Rate: {mean_best_lr:.2e}")
    
    # Batch size analysis
    best_batch_models = analysis_df.nsmallest(3, 'best_loss')
    mean_best_batch = int(best_batch_models['batch_size'].mean())
    print(f"  • Recommended Batch Size: {mean_best_batch}")
    
    print(f"\nNEXT STEPS:")
    print(f"  • Use {best_model['model_name']} for music generation")
    print(f"  • Consider fine-tuning with domain-specific data")
    print(f"  • Experiment with ensemble methods using top 3 models")
    print(f"  • Implement model compression for deployment")

print(f"\nAll models and results saved in: {config['output_dir']}")
print("Training pipeline completed successfully!")

In [4]:
# Sadece 'standard_beta' config ile model eğit → temiz ve uyumlu model üretiriz
results, comparison_df = create_training_pipeline(
    configs={'standard_beta': CONFIG_TEMPLATES['standard_beta']},
    latent_dir='latent_representations',
    output_dir='advanced_vae_models',
    max_samples=None  # veya mesela 5000 gibi bir sayı koyabilirsin hızlı deneme için
)


Training standard_beta
Using device: cpu
Loaded a total of 6976 latent representations.


Loading latent representations:   0%|          | 0/6976 [00:00<?, ?it/s]

Epoch 1/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 1, Loss: 385321.8931, Recon: 385321.7993, KL: 0.1669


Epoch 2/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 2, Loss: 384914.2844, Recon: 384914.1907, KL: 0.1671


Epoch 3/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 3, Loss: 384776.0903, Recon: 384775.9966, KL: 0.1667


Epoch 4/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 4, Loss: 384729.5106, Recon: 384729.4169, KL: 0.1651


Epoch 5/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 5, Loss: 384714.4751, Recon: 384714.3816, KL: 0.1629


Epoch 6/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 6, Loss: 384709.7162, Recon: 384709.6253, KL: 0.1596


Epoch 7/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 7, Loss: 384708.1431, Recon: 384708.0697, KL: 0.1553


Epoch 8/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 8, Loss: 384707.6193, Recon: 384707.5565, KL: 0.1480


Epoch 9/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 9, Loss: 384707.4553, Recon: 384707.3928, KL: 0.1347


Epoch 10/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 10, Loss: 384707.4025, Recon: 384707.3400, KL: 0.1119


Epoch 11/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 11, Loss: 384707.3687, Recon: 384707.3346, KL: 0.0799


Epoch 12/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 12, Loss: 384707.3624, Recon: 384707.3311, KL: 0.0491


Epoch 13/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 13, Loss: 384707.3406, Recon: 384707.3274, KL: 0.0307


Epoch 14/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 14, Loss: 384707.3286, Recon: 384707.3283, KL: 0.0220


Epoch 15/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 15, Loss: 384707.3254, Recon: 384707.3254, KL: 0.0175


Epoch 16/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 16, Loss: 384707.3283, Recon: 384707.3283, KL: 0.0149


Epoch 17/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 17, Loss: 384707.3260, Recon: 384707.3260, KL: 0.0138


Epoch 18/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 18, Loss: 384707.3257, Recon: 384707.3257, KL: 0.0129


Epoch 19/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 19, Loss: 384707.3286, Recon: 384707.3286, KL: 0.0122


Epoch 20/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 20, Loss: 384707.3306, Recon: 384707.3306, KL: 0.0113


Epoch 21/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 21, Loss: 384707.3240, Recon: 384707.3240, KL: 0.0107


Epoch 22/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 22, Loss: 384707.3265, Recon: 384707.3265, KL: 0.0101


Epoch 23/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 23, Loss: 384707.3277, Recon: 384707.3277, KL: 0.0097


Epoch 24/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 24, Loss: 384707.3280, Recon: 384707.3280, KL: 0.0092


Epoch 25/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 25, Loss: 384707.3291, Recon: 384707.3291, KL: 0.0088


Epoch 26/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 26, Loss: 384707.3274, Recon: 384707.3274, KL: 0.0085


Epoch 27/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 27, Loss: 384707.3288, Recon: 384707.3288, KL: 0.0080


Epoch 28/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 28, Loss: 384707.3300, Recon: 384707.3300, KL: 0.0079


Epoch 29/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 29, Loss: 384707.3297, Recon: 384707.3297, KL: 0.0079


Epoch 30/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 30, Loss: 384707.3303, Recon: 384707.3303, KL: 0.0076


Epoch 31/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 31, Loss: 384707.3268, Recon: 384707.3268, KL: 0.0075


Epoch 32/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 32, Loss: 384707.3277, Recon: 384707.3277, KL: 0.0075


Epoch 33/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 33, Loss: 384707.3263, Recon: 384707.3263, KL: 0.0072


Epoch 34/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 34, Loss: 384707.3220, Recon: 384707.3220, KL: 0.0070


Epoch 35/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 35, Loss: 384707.3257, Recon: 384707.3257, KL: 0.0068


Epoch 36/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 36, Loss: 384707.3234, Recon: 384707.3234, KL: 0.0068


Epoch 37/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 37, Loss: 384707.3257, Recon: 384707.3257, KL: 0.0065


Epoch 38/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 38, Loss: 384707.3254, Recon: 384707.3254, KL: 0.0064


Epoch 39/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 39, Loss: 384707.3263, Recon: 384707.3263, KL: 0.0065


Epoch 40/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 40, Loss: 384707.3277, Recon: 384707.3277, KL: 0.0064


Epoch 41/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 41, Loss: 384707.3248, Recon: 384707.3248, KL: 0.0062


Epoch 42/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 42, Loss: 384707.3260, Recon: 384707.3260, KL: 0.0062


Epoch 43/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 43, Loss: 384707.3268, Recon: 384707.3268, KL: 0.0061


Epoch 44/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 44, Loss: 384707.3251, Recon: 384707.3251, KL: 0.0060


Epoch 45/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 45, Loss: 384707.3306, Recon: 384707.3306, KL: 0.0059


Epoch 46/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 46, Loss: 384707.3265, Recon: 384707.3265, KL: 0.0059


Epoch 47/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 47, Loss: 384707.3265, Recon: 384707.3265, KL: 0.0057


Epoch 48/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 48, Loss: 384707.3286, Recon: 384707.3286, KL: 0.0057


Epoch 49/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 49, Loss: 384707.3271, Recon: 384707.3271, KL: 0.0057


Epoch 50/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 50, Loss: 384707.3240, Recon: 384707.3240, KL: 0.0055


Epoch 51/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 51, Loss: 384707.3294, Recon: 384707.3294, KL: 0.0055


Epoch 52/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 52, Loss: 384707.3243, Recon: 384707.3243, KL: 0.0055


Epoch 53/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 53, Loss: 384707.3283, Recon: 384707.3283, KL: 0.0054


Epoch 54/100:   0%|          | 0/109 [00:00<?, ?it/s]

Epoch 54, Loss: 384707.3228, Recon: 384707.3228, KL: 0.0053
Early stopping at epoch 54


TypeError: Object of type int64 is not JSON serializable