In [None]:
"""
Analyze computation times from CSV database
"""

import pandas as pd
from pathlib import Path

def analyze_timings(csv_path='database_navi.csv'):
    """
    Analyze and display timing statistics from CSV.
    
    Parameters:
    -----------
    csv_path : str
        Path to CSV file
    """
    csv_file = Path(csv_path)
    
    if not csv_file.exists():
        print(f"❌ CSV file not found: {csv_path}")
        return
    
    # Load CSV
    df = pd.read_csv(csv_path)
    
    if len(df) == 0:
        print("❌ No data in CSV")
        return
    
    print("\n" + "="*70)
    print("TIMING ANALYSIS")
    print("="*70)
    
    # Total calculations
    n_calcs = len(df)
    print(f"\nTotal calculations: {n_calcs}")
    
    # Total times
    total_time_s = df['total_time_s'].sum()
    total_time_h = total_time_s / 3600
    total_time_d = total_time_h / 24
    
    print(f"\nTotal computation time:")
    print(f"  {total_time_s:.1f} seconds")
    print(f"  {total_time_h:.2f} hours")
    print(f"  {total_time_d:.3f} days")
    
    # Average times
    avg_total = df['total_time_s'].mean()
    avg_initial_relax = df['initial_relax_time_s'].mean()
    avg_final_relax = df['final_relax_time_s'].mean()
    avg_neb = df['neb_time_s'].mean()
    
    print(f"\nAverage times per calculation:")
    print(f"  Total:           {avg_total:.1f} s  ({avg_total/60:.1f} min)")
    print(f"  Initial relax:   {avg_initial_relax:.1f} s")
    print(f"  Final relax:     {avg_final_relax:.1f} s")
    print(f"  NEB:             {avg_neb:.1f} s")
    
    # Min/Max
    min_total = df['total_time_s'].min()
    max_total = df['total_time_s'].max()
    
    print(f"\nTime range:")
    print(f"  Fastest: {min_total:.1f} s ({min_total/60:.1f} min)")
    print(f"  Slowest: {max_total:.1f} s ({max_total/60:.1f} min)")
    
    # Breakdown percentage
    total_relax_time = df['initial_relax_time_s'].sum() + df['final_relax_time_s'].sum()
    total_neb_time = df['neb_time_s'].sum()
    
    relax_percent = (total_relax_time / total_time_s) * 100
    neb_percent = (total_neb_time / total_time_s) * 100
    
    print(f"\nTime breakdown:")
    print(f"  Relaxation: {total_relax_time/3600:.2f} h ({relax_percent:.1f}%)")
    print(f"  NEB:        {total_neb_time/3600:.2f} h ({neb_percent:.1f}%)")
    

    
    # Statistics by diffusing element (if available)
    if 'diffusing_element' in df.columns:
        print(f"\nBy diffusing element:")
        elem_stats = df.groupby('diffusing_element')['total_time_s'].agg(['count', 'mean'])
        elem_stats['mean_minutes'] = elem_stats['mean'] / 60
        
        print(f"\n{'Element':<12} {'Count':<8} {'Avg (min)':<12}")
        print("-" * 40)
        for elem, row in elem_stats.iterrows():
            print(f"{elem:<12} {int(row['count']):<8} {row['mean_minutes']:<12.1f}")
    
    print("\n" + "="*70)


if __name__ == "__main__":
    analyze_timings('database_test.csv')