In [None]:
##################
# This codes is the basis for optimizing the number of investments based on strategy
# ######################################################


from typing import Dict, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import math as math



# Import our new, separated modules
from parameters import PortfolioResult, CompanyResult,FundParameters
import engine as vcm

# This line correctly imports the fully-processed and validated
# parameters from your loader script.
from parameters_loader import fund_params as base_case_params

from analysis_utils import display_portfolio_deep_dive, display_company_journeys

def calculate_expected_investment_per_company_monte_carlo(
    params: FundParameters, 
    rng: np.random.Generator = None, 
    n_simulations: int = 10000,
    verbose: bool = False
) -> Dict:
    """
    Calculates the expected total investment per company across all stages using Monte Carlo simulation,
    considering survival probabilities, follow-on requirements, and different investment strategies.
    
    Args:
        params: Fund parameters containing stage definitions and strategy
        rng: Random number generator for reproducible results
        n_simulations: Number of Monte Carlo simulations to run
        verbose: Whether to print debug information
        
    Returns:
        Dictionary with investment expectations by initial stage
    """
    if rng is None:
        rng = np.random.default_rng(seed=42) 

    results = {}
    stages = params.stages_order
    
    # Loop through each possible initial stage (excluding last 2 stages)
    for calculation_stage in range(0, len(stages) - 2):
        
        initial_stage_idx = calculation_stage
        current_stage = stages[initial_stage_idx]
        initial_stage = current_stage
        
        if verbose:
            print(f"\n=== Analyzing initial stage: {initial_stage} ===")
        
        # Sample initial valuations from lognormal distribution
        mu_log = params.stages[current_stage].post_money_valuation_dist.mu_log
        sigma_log = params.stages[current_stage].post_money_valuation_dist.sigma_log
        
        if verbose:
            print(f'Mu_log: {mu_log}, Sigma_log: {sigma_log}')
        
        valuation_sample = rng.lognormal(mean=mu_log, sigma=sigma_log, size=n_simulations)
        
        if verbose:
            print(f'Initial valuation - Mean: ${np.mean(valuation_sample):,.0f}, Median: ${np.median(valuation_sample):,.0f}')
        
        # Calculate initial investment
        initial_ownership = params.initial_ownership_targets[current_stage]
        initial_investment = valuation_sample * initial_ownership
        
        if verbose:
            print(f'Initial investment - Mean: ${np.mean(initial_investment):,.0f}')
        
        # Initialize tracking variables
        survival_rate = 1.0
        cumulative_investment = initial_investment.copy()
        
        # Determine follow-on strategy
        strategy_type = params.follow_on_strategy.type
        
        if strategy_type == "spray_and_pray":
            current_pro_rata_rate = 0.0
        elif strategy_type == "passive":
            current_pro_rata_rate = params.follow_on_strategy.passive_participation_rate
        elif strategy_type == "pro_rata":
            current_pro_rata_rate = 1.0
        else:
            current_pro_rata_rate = 0.0  # Default fallback
        
        if verbose:
            print(f'Follow-on strategy: {strategy_type}, Pro-rata rate: {current_pro_rata_rate}')
            print('\n--- Follow-on rounds simulation ---')
        
        # Simulate follow-on rounds
        for stage_idx in range(initial_stage_idx, len(stages) - 2):
            current_stage = stages[stage_idx]
            
            if verbose:
                print(f'\nStage {stage_idx}: {current_stage}')
            
            # Store old valuation for multiple calculation
            old_valuation = valuation_sample.copy()
            
            # Simulate next stage valuation
            valuation_sample = quick_simulate_vectorized(
                current_stage, 
                valuation_sample, 
                n_simulations=1
            )
            
            # Calculate valuation multiple
            valuation_multiple = valuation_sample / old_valuation
            
            if verbose:
                print(f'Valuation multiple - Mean: {np.mean(valuation_multiple):.2f}x, Median: {np.median(valuation_multiple):.2f}x')
                print(f'New valuation - Mean: ${np.mean(valuation_sample):,.0f}, Median: ${np.median(valuation_sample):,.0f}')
            
            # Update survival rate
            survival_rate = survival_rate * params.stages[current_stage].prob_to_next_stage
            
            if verbose:
                print(f'Cumulative survival rate: {survival_rate:.3f}')
            
            # Calculate follow-on investment
            target_dilution = params.stages[current_stage].target_dilution_pct
            
            follow_on = (valuation_sample * target_dilution * 
                        initial_ownership * current_pro_rata_rate * survival_rate)
            
            if verbose:
                print(f'Follow-on investment - Mean: ${np.mean(follow_on):,.0f}, Median: ${np.median(follow_on):,.0f}')
            
            # Add to cumulative investment
            cumulative_investment = follow_on + cumulative_investment
            
            if verbose:
                print(f'Cumulative investment - Mean: ${np.mean(cumulative_investment):,.0f}, Median: ${np.median(cumulative_investment):,.0f}')
        
        # Store results for this initial stage
        mean_initial = np.mean(initial_investment)
        mean_cumulative = np.mean(cumulative_investment)
        
        results[initial_stage] = {
            'initial_investment': mean_initial,
            'total_expected_investment': mean_cumulative,
            'follow_on_multiple': mean_cumulative / mean_initial if mean_initial > 0 else 0,
            'simulation_stats': {
                'initial_investment_median': np.median(initial_investment),
                'total_expected_investment_median': np.median(cumulative_investment),
                'initial_investment_std': np.std(initial_investment),
                'total_expected_investment_std': np.std(cumulative_investment)
            }
        }
        
        if verbose:
            print(f'\n=== Final Results for {initial_stage} ===')
            print(f'Mean Initial Investment: ${mean_initial:,.0f}')
            print(f'Mean Total Expected Investment: ${mean_cumulative:,.0f}')
            print(f'Follow-on Multiple: {mean_cumulative / mean_initial:.2f}x')
    
    return results


def calculate_optimal_portfolio_size(
    params: FundParameters,
    rng: np.random.Generator = None,
    n_simulations: int = 10000
) -> Dict:
    """
    Calculates the optimal number of initial investments using year-by-year, deal-by-deal simulation
    until capital constraints are reached.
    
    Args:
        params: Fund parameters containing allocation strategy and constraints
        rng: Random number generator for stage selection (optional, will create if None)
        n_simulations: Number of Monte Carlo simulations for expected investment calculation
        
    Returns:
        Dictionary with optimal portfolio sizing analysis
    """
    
    if rng is None:
        rng = np.random.default_rng(seed=42)
    
    # Calculate expected investments per company using Monte Carlo
    expected_investments = calculate_expected_investment_per_company_monte_carlo(
        params, n_simulations=n_simulations, verbose=False
    )
    
    # Available capital for investments
    available_capital = params.committed_capital * params.target_investable_capital_pct
    
    # Initialize tracking variables
    cumulative_expected_investment = 0.0
    total_deals = 0
    deals_by_year = {}
    deals_by_stage = {}
    investment_timeline = []
    
    # Get list of available stages from expected_investments
    available_stages = list(expected_investments.keys())
    
    # Loop through years and deals
    for stage_allocation_obj in params.dynamic_stage_allocation:
        year = stage_allocation_obj.year
        deals_by_year[year] = 0
        
        # Extract stage probabilities for this year
        stage_probs = []
        stage_names = []
        
        for stage in available_stages:
            if stage in stage_allocation_obj.allocation:
                stage_names.append(stage)
                stage_probs.append(stage_allocation_obj.allocation[stage])
        
        # Normalize probabilities (in case they don't sum to 1)
        if sum(stage_probs) > 0:
            stage_probs = np.array(stage_probs) / sum(stage_probs)
        else:
            continue  # Skip year if no valid allocations
        
        # Loop through deals for this year
        for deal_in_year in range(params.max_deals_per_year):
            # Select stage based on allocation probabilities
            selected_stage = rng.choice(stage_names, p=stage_probs)
            
            # Get expected investment for this stage
            deal_expected_investment = expected_investments[selected_stage]['total_expected_investment']
            
            # Check if this deal would exceed available capital
            if cumulative_expected_investment + deal_expected_investment > available_capital:
                # We've hit the capital constraint
                break
            
            # Add this deal
            cumulative_expected_investment += deal_expected_investment
            total_deals += 1
            deals_by_year[year] += 1
            
            # Track deals by stage
            if selected_stage not in deals_by_stage:
                deals_by_stage[selected_stage] = 0
            deals_by_stage[selected_stage] += 1
            
            # Track investment timeline
            investment_timeline.append({
                'deal_number': total_deals,
                'year': year,
                'stage': selected_stage,
                'deal_investment': deal_expected_investment,
                'cumulative_investment': cumulative_expected_investment
            })
        
        # If we hit capital constraint, break out of year loop too
        if cumulative_expected_investment + min([expected_investments[s]['total_expected_investment'] 
                                               for s in available_stages]) > available_capital:
            break
    
    # Calculate statistics
    avg_expected_investment = cumulative_expected_investment / total_deals if total_deals > 0 else 0
    capital_utilization = cumulative_expected_investment / available_capital if available_capital > 0 else 0
    
    # Add safety margin (e.g., 10% buffer)
    conservative_num_investments = int(total_deals * 0.9)
    
    return {
        'current_num_investments': params.num_investments,
        'available_capital': available_capital,
        'optimal_num_investments': total_deals,
        'conservative_num_investments': conservative_num_investments,
        'total_expected_investment': cumulative_expected_investment,
        'avg_expected_investment_per_company': avg_expected_investment,
        'expected_capital_utilization': capital_utilization,
        'remaining_capital': available_capital - cumulative_expected_investment,
        'deals_by_year': deals_by_year,
        'deals_by_stage': deals_by_stage,
        'investment_timeline': investment_timeline,
        'stage_breakdown': expected_investments
    }


def analyze_capital_allocation(params: FundParameters, rng: np.random.Generator = None):
    """
    Provides a comprehensive analysis of capital allocation optimization using Monte Carlo simulation.
    """
    
    if rng is None:
        rng = np.random.default_rng(seed=42)
    
    # First calculate expected investments per company using Monte Carlo
    expected_investments = calculate_expected_investment_per_company_monte_carlo(
        params, rng, n_simulations=10000, verbose=False
    )
    
    # Then calculate optimal portfolio size using deal-by-deal simulation
    analysis = calculate_optimal_portfolio_size(params, expected_investments, rng)
    
    print("=== CAPITAL ALLOCATION ANALYSIS ===\n")
    print(f"Fund Size: ${params.committed_capital:,.0f}")
    print(f"Available for Investments: ${analysis['available_capital']:,.0f}")
    print(f"Target Investment %: {params.target_investable_capital_pct:.1%}")
    print(f"Max Deals per Year: {params.max_deals_per_year}\n")
    
    print("=== PORTFOLIO SIZING ===")
    print(f"Current Portfolio Size: {analysis['current_num_investments']} companies")
    print(f"Optimal Portfolio Size: {analysis['optimal_num_investments']} companies")
    print(f"Conservative Portfolio Size: {analysis['conservative_num_investments']} companies")
    print(f"Expected Capital Utilization: {analysis['expected_capital_utilization']:.1%}")
    print(f"Remaining Capital: ${analysis['remaining_capital']:,.0f}\n")
    
    print(f"Average Expected Investment per Company: ${analysis['avg_expected_investment_per_company']:,.0f}")
    print(f"Total Expected Investment: ${analysis['total_expected_investment']:,.0f}\n")
    
    print("=== DEALS BY YEAR ===")
    for year, deals in analysis['deals_by_year'].items():
        if deals > 0:
            print(f"Year {year}: {deals} deals")
    
    print("\n=== DEALS BY STAGE ===")
    for stage, deals in analysis['deals_by_stage'].items():
        print(f"{stage}: {deals} deals ({deals/analysis['optimal_num_investments']:.1%})")
    
    print("\n=== BY STAGE BREAKDOWN ===")
    for stage, data in analysis['stage_breakdown'].items():
        print(f"{stage}:")
        print(f"  Initial Investment: ${data['initial_investment']:,.0f}")
        print(f"  Total Expected: ${data['total_expected_investment']:,.0f}")
        print(f"  Follow-on Multiple: {data['follow_on_multiple']:.2f}x")
    
    return analysis


def quick_simulate_vectorized(stage: str, from_valuations, n_simulations: int = 1):
    """
    Vectorized simulation for multiple initial valuations.
    
    Parameters:
    stage: Stage name
    from_valuations: Single valuation (float) or array of valuations
    n_simulations: Number of simulations per valuation
    
    Returns:
    - If n_simulations=1: 1D array of next valuations (same length as input)
    - If n_simulations>1: 2D array of shape (len(from_valuations), n_simulations)
    - If single valuation input: behaves like quick_simulate()
    """
    rng = np.random.default_rng(seed=42)
    model_params = {
        'Pre-Seed': {'alpha': 7.9438, 'beta': 0.4823, 'residual_std': 1.0643},
        'Seed': {'alpha': 9.7765, 'beta': 0.5139, 'residual_std': 0.7872},
        'Series A': {'alpha': 7.4050, 'beta': 0.3594, 'residual_std': 0.7012},
        'Series B': {'alpha': 6.7335, 'beta': 0.3115, 'residual_std': 0.8112}
    }
    
    if stage not in model_params:
        raise ValueError(f"Stage '{stage}' not supported. Available: {list(model_params.keys())}")
    
    params = model_params[stage]
    
    # Convert input to numpy array for consistent handling
    from_valuations = np.atleast_1d(from_valuations)
    is_single_input = len(from_valuations) == 1
    
    # Calculate expected log(multiples) for each valuation
    expected_log_multiples = params['alpha'] - params['beta'] * np.log(from_valuations)
    
    # Generate random noise
    if n_simulations == 1:
        # Single simulation per valuation - return 1D array
        noise = rng.normal(0, params['residual_std'], size=len(from_valuations))
        log_multiples = expected_log_multiples + noise
        multiples = np.exp(log_multiples)
        next_valuations = from_valuations * multiples
        
        # Return scalar if single input, otherwise return 1D array
        return float(next_valuations[0]) if is_single_input else next_valuations
        
    else:
        # Multiple simulations - return 2D array
        noise = rng.normal(0, params['residual_std'], 
                               size=(len(from_valuations), n_simulations))
        log_multiples = expected_log_multiples.reshape(-1, 1) + noise
        multiples = np.exp(log_multiples)
        next_valuations = from_valuations.reshape(-1, 1) * multiples
        
        # Return 1D array if single input, otherwise return 2D array
        return next_valuations[0] if is_single_input else next_valuations

✅ Configuration file successfully validated against schema.
✅ Configuration file successfully passed logical validation.
✅ FundParameters object created successfully.


In [3]:
calculate_expected_investment_per_company_monte_carlo(base_case_params)

calculate_optimal_portfolio_size(base_case_params)

{'current_num_investments': 20,
 'available_capital': 40000000.0,
 'optimal_num_investments': 12,
 'conservative_num_investments': 10,
 'total_expected_investment': 39359783.69027105,
 'avg_expected_investment_per_company': 3279981.974189254,
 'expected_capital_utilization': 0.9839945922567762,
 'remaining_capital': 640216.3097289503,
 'deals_by_year': {1: 6, 2: 6},
 'deals_by_stage': {'Seed': 8, 'Pre-Seed': 4},
 'investment_timeline': [{'deal_number': 1,
   'year': 1,
   'stage': 'Seed',
   'deal_investment': 3821264.82962178,
   'cumulative_investment': 3821264.82962178},
  {'deal_number': 2,
   'year': 1,
   'stage': 'Pre-Seed',
   'deal_investment': 2197416.2633242025,
   'cumulative_investment': 6018681.092945983},
  {'deal_number': 3,
   'year': 1,
   'stage': 'Seed',
   'deal_investment': 3821264.82962178,
   'cumulative_investment': 9839945.922567762},
  {'deal_number': 4,
   'year': 1,
   'stage': 'Seed',
   'deal_investment': 3821264.82962178,
   'cumulative_investment': 1366