In [None]:
"""
Excel Model Profiler - Phase 2: Model Optimization
Comprehensive analysis and optimization tool for Excel financial models
"""

In [None]:
import xlwings as xw
import time
import psutil
import json
import re
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
import pandas as pd
import openpyxl
from collections import defaultdict

In [None]:
@dataclass
class PerformanceMetrics:
    """Container for performance measurement data"""
    operation_name: str
    start_time: float
    end_time: float
    duration: float
    memory_before: float
    memory_after: float
    memory_delta: float
    
    def to_dict(self) -> Dict:
        return {
            'operation': self.operation_name,
            'duration_seconds': round(self.duration, 4),
            'memory_before_mb': round(self.memory_before, 2),
            'memory_after_mb': round(self.memory_after, 2),
            'memory_delta_mb': round(self.memory_delta, 2)
        }

In [None]:
class ExcelModelProfiler:
    """
    Comprehensive Excel Model Performance Profiler
    Analyzes and optimizes Excel financial models
    """
    
    def __init__(self, excel_file_path: str):
        self.excel_file_path = Path(excel_file_path)
        self.app = None
        self.wb = None
        self.metrics = []
        self.analysis_results = {}
        self.optimization_recommendations = []
        
        # Configuration
        self.profile_config = {
            'enable_macro_profiling': True,
            'enable_formula_analysis': True,
            'enable_memory_monitoring': True,
            'enable_volatility_audit': True,
            'create_backup': True
        }
        
        print(f"🔬 Excel Model Profiler initialized for: {self.excel_file_path.name}")
    
    def __enter__(self):
        """Context manager entry"""
        self._open_excel()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit"""
        self._close_excel()
    
    def _open_excel(self):
        """Open Excel application and workbook"""
        try:
            self.app = xw.App(visible=False)
            self.wb = self.app.books.open(self.excel_file_path)
            print(f"✅ Excel workbook opened: {self.excel_file_path.name}")
        except Exception as e:
            print(f"❌ Failed to open Excel file: {e}")
            raise
    
    def _close_excel(self):
        """Close Excel application"""
        if self.wb:
            self.wb.close()
        if self.app:
            self.app.quit()
    
    def _get_memory_usage(self) -> float:
        """Get current memory usage in MB"""
        process = psutil.Process()
        return process.memory_info().rss / (1024 * 1024)
    
    def _start_performance_timer(self, operation_name: str) -> Dict:
        """Start timing an operation"""
        return {
            'name': operation_name,
            'start_time': time.time(),
            'memory_before': self._get_memory_usage()
        }
    
    def _end_performance_timer(self, timer_data: Dict) -> PerformanceMetrics:
        """End timing an operation and create metrics"""
        end_time = time.time()
        memory_after = self._get_memory_usage()
        
        metrics = PerformanceMetrics(
            operation_name=timer_data['name'],
            start_time=timer_data['start_time'],
            end_time=end_time,
            duration=end_time - timer_data['start_time'],
            memory_before=timer_data['memory_before'],
            memory_after=memory_after,
            memory_delta=memory_after - timer_data['memory_before']
        )
        
        self.metrics.append(metrics)
        return metrics
    
    def profile_macro_performance(self) -> Dict:
        """
        Step 1: Profile macro performance by injecting timing code
        """
        print("\n🔍 Step 1: Profiling Macro Performance...")
        
        macro_results = {}
        
        # Test each macro individually
        macros_to_test = [
            'UpdateFinancialModel',
            'SolveForLeasePayment', 
            'ResetModel'
        ]
        
        for macro_name in macros_to_test:
            print(f"   Testing macro: {macro_name}")
            
            try:
                # Time macro execution
                timer = self._start_performance_timer(f"Macro: {macro_name}")
                
                # Execute macro
                self.wb.macro(macro_name)()
                
                # Record results
                metrics = self._end_performance_timer(timer)
                macro_results[macro_name] = metrics.to_dict()
                
                print(f"   ✅ {macro_name}: {metrics.duration:.3f}s, Memory: {metrics.memory_delta:+.1f}MB")
                
            except Exception as e:
                print(f"   ❌ {macro_name} failed: {e}")
                macro_results[macro_name] = {'error': str(e)}
        
        self.analysis_results['macro_performance'] = macro_results
        return macro_results
    
    def profile_solver_execution(self) -> Dict:
        """
        Step 2: Profile Goal Seek and Solver operations
        """
        print("\n🎯 Step 2: Profiling Solver Execution...")
        
        solver_results = {}
        
        # Test Goal Seek operation (part of SolveForLeasePayment)
        try:
            print("   Testing Goal Seek performance...")
            
            # Get current values
            target_cell = self.wb.sheets['Results'].range('B4')  # IRR
            changing_cell = self.wb.sheets['Inputs'].range('B14')  # Lease Payment
            target_irr = self.wb.sheets['Inputs'].range('B13').value  # Target IRR
            
            original_lease = changing_cell.value
            
            # Time Goal Seek operation
            timer = self._start_performance_timer("Goal Seek Operation")
            
            # Perform Goal Seek
            target_cell.api.GoalSeek(Goal=target_irr, ChangingCell=changing_cell.api)
            
            metrics = self._end_performance_timer(timer)
            
            # Check convergence
            achieved_irr = target_cell.value
            convergence_error = abs(achieved_irr - target_irr) if achieved_irr else float('inf')
            
            solver_results['goal_seek'] = {
                **metrics.to_dict(),
                'target_irr': target_irr,
                'achieved_irr': achieved_irr,
                'convergence_error': convergence_error,
                'converged': convergence_error < 0.0001,
                'original_lease_payment': original_lease,
                'optimized_lease_payment': changing_cell.value
            }
            
            print(f"   ✅ Goal Seek: {metrics.duration:.3f}s, Convergence: {convergence_error:.6f}")
            
        except Exception as e:
            print(f"   ❌ Goal Seek failed: {e}")
            solver_results['goal_seek'] = {'error': str(e)}
        
        self.analysis_results['solver_performance'] = solver_results
        return solver_results
    
    def profile_formula_calculation(self) -> Dict:
        """
        Step 3: Profile formula calculation timing for different sections
        """
        print("\n📊 Step 3: Profiling Formula Calculations...")
        
        calculation_results = {}
        
        # Test different calculation scenarios
        scenarios = [
            ('Full Workbook Recalc', lambda: self.app.api.Calculate()),
            ('Inputs Sheet Calc', lambda: self.wb.sheets['Inputs'].api.Calculate()),
            ('Calculations Sheet Calc', lambda: self.wb.sheets['Calculations'].api.Calculate()),
            ('Results Sheet Calc', lambda: self.wb.sheets['Results'].api.Calculate())
        ]
        
        for scenario_name, calc_function in scenarios:
            print(f"   Testing: {scenario_name}")
            
            try:
                # Multiple runs for average timing
                durations = []
                
                for run in range(3):  # 3 runs for average
                    timer = self._start_performance_timer(f"{scenario_name} - Run {run+1}")
                    calc_function()
                    metrics = self._end_performance_timer(timer)
                    durations.append(metrics.duration)
                
                avg_duration = sum(durations) / len(durations)
                min_duration = min(durations)
                max_duration = max(durations)
                
                calculation_results[scenario_name.lower().replace(' ', '_')] = {
                    'average_duration': round(avg_duration, 4),
                    'min_duration': round(min_duration, 4),
                    'max_duration': round(max_duration, 4),
                    'runs': durations
                }
                
                print(f"   ✅ {scenario_name}: Avg {avg_duration:.3f}s (min: {min_duration:.3f}s, max: {max_duration:.3f}s)")
                
            except Exception as e:
                print(f"   ❌ {scenario_name} failed: {e}")
                calculation_results[scenario_name.lower().replace(' ', '_')] = {'error': str(e)}
        
        self.analysis_results['calculation_performance'] = calculation_results
        return calculation_results
    
    def analyze_workbook_structure(self) -> Dict:
        """
        Step 4: Analyze workbook structure for optimization opportunities
        """
        print("\n🏗️  Step 4: Analyzing Workbook Structure...")
        
        structure_analysis = {
            'worksheets': {},
            'named_ranges': {},
            'file_stats': {},
            'formula_stats': {}
        }
        
        # Analyze worksheets
        for sheet in self.wb.sheets:
            print(f"   Analyzing sheet: {sheet.name}")
            
            try:
                used_range = sheet.used_range
                if used_range:
                    row_count = used_range.last_cell.row
                    col_count = used_range.last_cell.column
                    cell_count = row_count * col_count
                    
                    # Count formulas
                    formulas = 0
                    values = 0
                    empty_cells = 0
                    
                    # Sample cells for formula analysis (to avoid performance issues)
                    sample_size = min(1000, cell_count)
                    
                    for i in range(sample_size):
                        try:
                            row = (i % row_count) + 1
                            col = (i // row_count) + 1
                            cell = sheet.range(row, col)
                            
                            if cell.formula:
                                formulas += 1
                            elif cell.value is not None:
                                values += 1
                            else:
                                empty_cells += 1
                        except:
                            continue
                    
                    structure_analysis['worksheets'][sheet.name] = {
                        'dimensions': f"{row_count} x {col_count}",
                        'total_cells': cell_count,
                        'sample_size': sample_size,
                        'formulas_sampled': formulas,
                        'values_sampled': values,
                        'empty_cells_sampled': empty_cells,
                        'formula_percentage': round((formulas / sample_size) * 100, 2) if sample_size > 0 else 0
                    }
                    
                else:
                    structure_analysis['worksheets'][sheet.name] = {
                        'status': 'empty'
                    }
                    
            except Exception as e:
                structure_analysis['worksheets'][sheet.name] = {
                    'error': str(e)
                }
        
        # File size analysis
        file_size = self.excel_file_path.stat().st_size / (1024 * 1024)  # MB
        structure_analysis['file_stats'] = {
            'file_size_mb': round(file_size, 2),
            'worksheet_count': len(self.wb.sheets)
        }
        
        print(f"   ✅ File size: {file_size:.2f}MB, Sheets: {len(self.wb.sheets)}")
        
        self.analysis_results['structure_analysis'] = structure_analysis
        return structure_analysis
    
    def audit_volatile_functions(self) -> Dict:
        """
        Step 5: Audit volatile functions that cause performance issues
        """
        print("\n⚡ Step 5: Auditing Volatile Functions...")
        
        volatile_functions = ['NOW()', 'TODAY()', 'INDIRECT(', 'OFFSET(', 'RAND()', 'RANDBETWEEN(']
        volatile_audit = {
            'functions_found': {},
            'total_volatile_formulas': 0,
            'sheets_affected': []
        }
        
        # Use openpyxl for formula analysis (faster for reading formulas)
        try:
            wb_openpyxl = openpyxl.load_workbook(self.excel_file_path)
            
            for sheet_name in wb_openpyxl.sheetnames:
                sheet = wb_openpyxl[sheet_name]
                sheet_volatiles = defaultdict(int)
                
                print(f"   Scanning sheet: {sheet_name}")
                
                for row in sheet.iter_rows():
                    for cell in row:
                        if cell.data_type == 'f' and cell.value:  # Formula cell
                            formula = str(cell.value).upper()
                            
                            for volatile_func in volatile_functions:
                                if volatile_func in formula:
                                    sheet_volatiles[volatile_func] += 1
                                    volatile_audit['total_volatile_formulas'] += 1
                
                if sheet_volatiles:
                    volatile_audit['functions_found'][sheet_name] = dict(sheet_volatiles)
                    if sheet_name not in volatile_audit['sheets_affected']:
                        volatile_audit['sheets_affected'].append(sheet_name)
            
            wb_openpyxl.close()
            
        except Exception as e:
            print(f"   ⚠️  Could not analyze formulas with openpyxl: {e}")
            volatile_audit['error'] = str(e)
        
        total_found = volatile_audit['total_volatile_formulas']
        affected_sheets = len(volatile_audit['sheets_affected'])
        
        print(f"   ✅ Found {total_found} volatile formulas across {affected_sheets} sheets")
        
        if total_found > 50:
            self.optimization_recommendations.append(
                f"HIGH PRIORITY: {total_found} volatile functions found - consider reducing for better performance"
            )
        
        self.analysis_results['volatile_audit'] = volatile_audit
        return volatile_audit
    
    def run_full_profile(self) -> Dict:
        """
        Run complete profiling analysis
        """
        print(f"🚀 Starting Full Excel Model Profiling: {self.excel_file_path.name}")
        print("=" * 60)
        
        start_time = time.time()
        
        # Create backup if enabled
        if self.profile_config['create_backup']:
            backup_path = self.excel_file_path.parent / f"{self.excel_file_path.stem}_profile_backup{self.excel_file_path.suffix}"
            import shutil
            shutil.copy2(self.excel_file_path, backup_path)
            print(f"📁 Backup created: {backup_path.name}")
        
        # Run all profiling steps
        try:
            # Step 1: Macro Performance
            if self.profile_config['enable_macro_profiling']:
                self.profile_macro_performance()
            
            # Step 2: Solver Performance
            self.profile_solver_execution()
            
            # Step 3: Formula Calculations
            if self.profile_config['enable_formula_analysis']:
                self.profile_formula_calculation()
            
            # Step 4: Structure Analysis
            self.analyze_workbook_structure()
            
            # Step 5: Volatile Functions
            if self.profile_config['enable_volatility_audit']:
                self.audit_volatile_functions()
            
            # Generate summary
            total_time = time.time() - start_time
            
            profile_summary = {
                'profiling_completed': datetime.now().isoformat(),
                'total_profiling_time': round(total_time, 2),
                'excel_file': str(self.excel_file_path),
                'config_used': self.profile_config,
                'analysis_results': self.analysis_results,
                'optimization_recommendations': self.optimization_recommendations,
                'performance_metrics': [m.to_dict() for m in self.metrics]
            }
            
            print(f"\n🎉 Profiling Complete! Total time: {total_time:.2f}s")
            print(f"📊 Analysis results ready for optimization phase")
            
            return profile_summary
            
        except Exception as e:
            print(f"❌ Profiling failed: {e}")
            raise

In [None]:
def main():
    """
    Main function to run Excel Model Profiler
    """
    excel_file = "project_finance_lease_model.xlsm"
    
    if not Path(excel_file).exists():
        print(f"❌ Excel file not found: {excel_file}")
        return
    
    # Run profiling
    with ExcelModelProfiler(excel_file) as profiler:
        results = profiler.run_full_profile()
        
        # Save results
        output_file = Path(excel_file).stem + "_profiling_results.json"
        with open(output_file, 'w') as f:
            json.dump(results, f, indent=2)
        
        print(f"💾 Results saved to: {output_file}")

In [None]:
if __name__ == "__main__":
    main()