# SmartPave Analytics: Cost Optimization & Funding Allocation

## Overview
This notebook implements optimization algorithms to allocate maintenance funding across road segments for maximum impact and cost efficiency.

## Objectives
- Implement funding allocation optimization
- Calculate ROI for different strategies
- Compare maintenance approaches
- Generate funding recommendations
- Create cost-benefit analysis

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
matplotlib.use('Agg')

print("🚀 SmartPave Cost Optimization System")
print("="*50)


In [None]:
# Load Data from Snowflake
print("📊 Loading data from Snowflake...")

# Load features and maintenance data
features_df = session.sql("SELECT * FROM DOT_workshop_test.smartpave_analytics.pavement_features").to_pandas()
maintenance_df = session.sql("SELECT * FROM DOT_workshop_test.smartpave_analytics.maintenance_history").to_pandas()

print(f"Features data: {features_df.shape}")
print(f"Maintenance data: {maintenance_df.shape}")

# Show sample data
print(f"\nSample features data:")
print(features_df.head())


In [None]:
# Cost Optimization Algorithms
print("🔧 Implementing Cost Optimization Algorithms...")

class CostOptimizer:
    def __init__(self, features_df, maintenance_df):
        self.features_df = features_df
        self.maintenance_df = maintenance_df
        self.segments = features_df['SEGMENT_ID'].unique()
        self.n_segments = len(self.segments)
        
    def calculate_priority_score(self, segment_id):
        """Calculate priority score based on condition, traffic, and risk factors"""
        segment_data = self.features_df[self.features_df['SEGMENT_ID'] == segment_id]
        
        if len(segment_data) == 0:
            return 0
            
        # Get latest condition data
        latest_data = segment_data.sort_values('DATE').iloc[-1]
        
        # Priority factors (higher = more urgent)
        condition_factor = 10 - latest_data.get('CONDITION_SCORE', 5)  # Lower condition = higher priority
        traffic_factor = latest_data.get('TRAFFIC_VOLUME', 0) / 1000  # Higher traffic = higher priority
        risk_factor = latest_data.get('risk_score', 0)  # Higher risk = higher priority
        
        # Weighted priority score
        priority_score = (condition_factor * 0.4 + traffic_factor * 0.3 + risk_factor * 0.3)
        
        return priority_score
    
    def estimate_maintenance_cost(self, segment_id, maintenance_type='standard'):
        """Estimate maintenance cost for a segment"""
        segment_data = self.features_df[self.features_df['SEGMENT_ID'] == segment_id]
        
        if len(segment_data) == 0:
            return 10000  # Default cost
            
        latest_data = segment_data.sort_values('DATE').iloc[-1]
        
        # Base cost by road type
        road_type = latest_data.get('ROAD_TYPE', 'Local')
        base_costs = {
            'Highway': 50000,
            'Arterial': 30000,
            'Collector': 20000,
            'Local': 10000
        }
        
        base_cost = base_costs.get(road_type, 10000)
        
        # Adjust for condition (worse condition = higher cost)
        condition = latest_data.get('CONDITION_SCORE', 5)
        condition_multiplier = 1 + (5 - condition) * 0.2  # 20% increase per condition point below 5
        
        # Adjust for traffic (higher traffic = higher cost)
        traffic = latest_data.get('TRAFFIC_VOLUME', 0)
        traffic_multiplier = 1 + (traffic / 10000) * 0.1  # 10% increase per 10K traffic
        
        # Adjust for maintenance type
        type_multipliers = {
            'preventive': 0.5,
            'standard': 1.0,
            'rehabilitation': 2.0,
            'reconstruction': 5.0
        }
        
        type_multiplier = type_multipliers.get(maintenance_type, 1.0)
        
        estimated_cost = base_cost * condition_multiplier * traffic_multiplier * type_multiplier
        
        return estimated_cost
    
    def calculate_roi(self, segment_id, maintenance_type='standard'):
        """Calculate Return on Investment for maintenance"""
        cost = self.estimate_maintenance_cost(segment_id, maintenance_type)
        priority = self.calculate_priority_score(segment_id)
        
        # ROI = (Benefit - Cost) / Cost
        # Benefit is proportional to priority score
        benefit = priority * 1000  # Scale factor
        roi = (benefit - cost) / cost if cost > 0 else 0
        
        return roi
    
    def optimize_funding_allocation(self, total_budget, strategy='priority'):
        """Optimize funding allocation across segments"""
        print(f"💰 Optimizing funding allocation with ${total_budget:,.0f} budget...")
        
        # Calculate priority scores and costs for all segments
        segment_data = []
        for segment_id in self.segments:
            priority = self.calculate_priority_score(segment_id)
            cost = self.estimate_maintenance_cost(segment_id)
            roi = self.calculate_roi(segment_id)
            
            segment_data.append({
                'segment_id': segment_id,
                'priority_score': priority,
                'estimated_cost': cost,
                'roi': roi
            })
        
        segment_df = pd.DataFrame(segment_data)
        
        if strategy == 'priority':
            # Sort by priority score (highest first)
            segment_df = segment_df.sort_values('priority_score', ascending=False)
        elif strategy == 'roi':
            # Sort by ROI (highest first)
            segment_df = segment_df.sort_values('roi', ascending=False)
        elif strategy == 'cost_efficient':
            # Sort by priority/cost ratio (highest first)
            segment_df['priority_per_dollar'] = segment_df['priority_score'] / segment_df['estimated_cost']
            segment_df = segment_df.sort_values('priority_per_dollar', ascending=False)
        
        # Allocate budget
        allocated_segments = []
        remaining_budget = total_budget
        
        for _, row in segment_df.iterrows():
            if row['estimated_cost'] <= remaining_budget:
                allocated_segments.append({
                    'segment_id': row['segment_id'],
                    'priority_score': row['priority_score'],
                    'estimated_cost': row['estimated_cost'],
                    'roi': row['roi'],
                    'allocation': row['estimated_cost']
                })
                remaining_budget -= row['estimated_cost']
            else:
                # Partial allocation if budget allows
                if remaining_budget > 0:
                    allocated_segments.append({
                        'segment_id': row['segment_id'],
                        'priority_score': row['priority_score'],
                        'estimated_cost': row['estimated_cost'],
                        'roi': row['roi'],
                        'allocation': remaining_budget
                    })
                    remaining_budget = 0
                break
        
        return pd.DataFrame(allocated_segments), remaining_budget

# Initialize optimizer
optimizer = CostOptimizer(features_df, maintenance_df)
print(f"✅ Optimizer initialized for {optimizer.n_segments} segments")


In [None]:
# Compare Optimization Strategies
print("📊 Comparing Optimization Strategies...")

# Define budget scenarios
budgets = [1000000, 2000000, 5000000]  # $1M, $2M, $5M
strategies = ['priority', 'roi', 'cost_efficient']

results = {}

for budget in budgets:
    print(f"\n💰 Budget: ${budget:,}")
    print("-" * 40)
    
    budget_results = {}
    
    for strategy in strategies:
        allocation, remaining = optimizer.optimize_funding_allocation(budget, strategy)
        
        total_allocated = allocation['allocation'].sum()
        segments_served = len(allocation)
        avg_priority = allocation['priority_score'].mean()
        avg_roi = allocation['roi'].mean()
        
        budget_results[strategy] = {
            'total_allocated': total_allocated,
            'remaining_budget': remaining,
            'segments_served': segments_served,
            'avg_priority': avg_priority,
            'avg_roi': avg_roi,
            'allocation': allocation
        }
        
        print(f"  {strategy.upper()}:")
        print(f"    Segments served: {segments_served}")
        print(f"    Total allocated: ${total_allocated:,.0f}")
        print(f"    Remaining budget: ${remaining:,.0f}")
        print(f"    Avg priority: {avg_priority:.2f}")
        print(f"    Avg ROI: {avg_roi:.2f}")
    
    results[budget] = budget_results

print(f"\n✅ Strategy comparison complete!")


In [None]:
# Generate Visualizations and Recommendations
print("📈 Generating Visualizations and Recommendations...")

# Create summary DataFrame for visualization
summary_data = []
for budget, budget_results in results.items():
    for strategy, result in budget_results.items():
        summary_data.append({
            'Budget': f"${budget/1000000:.0f}M",
            'Strategy': strategy.title(),
            'Segments Served': result['segments_served'],
            'Total Allocated': result['total_allocated'],
            'Avg Priority': result['avg_priority'],
            'Avg ROI': result['avg_roi']
        })

summary_df = pd.DataFrame(summary_data)

# Visualization 1: Segments Served by Strategy
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
sns.barplot(data=summary_df, x='Budget', y='Segments Served', hue='Strategy')
plt.title('Segments Served by Strategy and Budget')
plt.xticks(rotation=45)

plt.subplot(2, 2, 2)
sns.barplot(data=summary_df, x='Budget', y='Total Allocated', hue='Strategy')
plt.title('Total Allocated by Strategy and Budget')
plt.xticks(rotation=45)

plt.subplot(2, 2, 3)
sns.barplot(data=summary_df, x='Budget', y='Avg Priority', hue='Strategy')
plt.title('Average Priority Score by Strategy and Budget')
plt.xticks(rotation=45)

plt.subplot(2, 2, 4)
sns.barplot(data=summary_df, x='Budget', y='Avg ROI', hue='Strategy')
plt.title('Average ROI by Strategy and Budget')
plt.xticks(rotation=45)

plt.tight_layout()
plt.savefig('/tmp/cost_optimization_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("📊 Visualization 1: Cost Optimization Analysis")
print("   - Segments served, total allocated, priority scores, and ROI by strategy")


In [None]:
# Generate Funding Recommendations
print("🎯 Generating Funding Recommendations...")

# Find best strategy for each budget
recommendations = {}

for budget in budgets:
    budget_results = results[budget]
    
    # Score each strategy (higher is better)
    strategy_scores = {}
    for strategy, result in budget_results.items():
        # Weighted score: segments served (40%) + avg priority (30%) + avg ROI (30%)
        score = (result['segments_served'] * 0.4 + 
                result['avg_priority'] * 0.3 + 
                result['avg_roi'] * 0.3)
        strategy_scores[strategy] = score
    
    best_strategy = max(strategy_scores.keys(), key=lambda x: strategy_scores[x])
    best_result = budget_results[best_strategy]
    
    recommendations[budget] = {
        'best_strategy': best_strategy,
        'score': strategy_scores[best_strategy],
        'result': best_result
    }

# Display recommendations
print("\n" + "="*60)
print("🎯 FUNDING ALLOCATION RECOMMENDATIONS")
print("="*60)

for budget, rec in recommendations.items():
    print(f"\n💰 BUDGET: ${budget:,}")
    print(f"🏆 RECOMMENDED STRATEGY: {rec['best_strategy'].upper()}")
    print(f"📊 PERFORMANCE SCORE: {rec['score']:.2f}")
    print(f"🛣️  SEGMENTS SERVED: {rec['result']['segments_served']}")
    print(f"💵 TOTAL ALLOCATED: ${rec['result']['total_allocated']:,.0f}")
    print(f"⚠️  AVG PRIORITY: {rec['result']['avg_priority']:.2f}")
    print(f"📈 AVG ROI: {rec['result']['avg_roi']:.2f}")

# Top priority segments for immediate action
print(f"\n🚨 TOP PRIORITY SEGMENTS FOR IMMEDIATE ACTION")
print("-" * 50)

# Get top 10 segments by priority score
all_segments = []
for segment_id in optimizer.segments:
    priority = optimizer.calculate_priority_score(segment_id)
    cost = optimizer.estimate_maintenance_cost(segment_id)
    roi = optimizer.calculate_roi(segment_id)
    
    all_segments.append({
        'segment_id': segment_id,
        'priority_score': priority,
        'estimated_cost': cost,
        'roi': roi
    })

top_segments = pd.DataFrame(all_segments).sort_values('priority_score', ascending=False).head(10)

for i, (_, row) in enumerate(top_segments.iterrows(), 1):
    print(f"{i:2d}. {row['segment_id']} - Priority: {row['priority_score']:.2f}, "
          f"Cost: ${row['estimated_cost']:,.0f}, ROI: {row['roi']:.2f}")

print(f"\n✅ COST OPTIMIZATION COMPLETE!")
print(f"📊 Analyzed {len(optimizer.segments)} road segments")
print(f"💰 Tested {len(budgets)} budget scenarios")
print(f"🎯 Generated {len(strategies)} strategy comparisons")
