In [None]:
"""
NW Realite - Property Valuation Analysis & AI Report Generator
Comprehensive metrics calculation, AI-powered insights, and PDF export
"""

# !pip install pandas numpy matplotlib seaborn supabase python-dotenv google-generativeai reportlab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import json
import warnings
import os
from supabase import create_client, Client
import google.generativeai as genai
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from io import BytesIO
import base64

warnings.filterwarnings('ignore')

print("‚úì All dependencies imported successfully")


In [None]:

# Supabase Configuration
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Gemini Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# Report Configuration
COMPANY_NAME = "NW Realite - Property Valuation Analysis Report"
REPORT_DATE = datetime.now().strftime("%B %d, %Y")
TABLE_NAME = "valuations_2025_clean_v2"

print(f" Configuration loaded for {COMPANY_NAME}")
print(f" Report date: {REPORT_DATE}")


In [None]:
def fetch_valuation_data():
    """Fetch all property valuation data from Supabase"""
    try:
        supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
        response = supabase.table(TABLE_NAME).select("*").execute()
        df = pd.DataFrame(response.data)
        print(f"‚úì Successfully fetched {len(df)} records from Supabase")
        return df
    except Exception as e:
        print(f"‚úó Error fetching data: {e}")
        return None

# Fetch data
df = fetch_valuation_data()

if df is not None:
    print(f"\nDataset Shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
else:
    print("Failed to fetch data. Please check your credentials.")


In [None]:
def prepare_data(df):
    """Clean and prepare data with derived features"""
    
    # Convert dates
    df['inspection_date'] = pd.to_datetime(df['inspection_date'])
    df['valuation_date'] = pd.to_datetime(df['valuation_date'])
    df['created_at'] = pd.to_datetime(df['created_at'])
    
    # Extract time features
    df['inspection_year'] = df['inspection_date'].dt.year
    df['inspection_month'] = df['inspection_date'].dt.month
    df['inspection_month_name'] = df['inspection_date'].dt.strftime('%b %Y')
    df['inspection_quarter'] = df['inspection_date'].dt.quarter
    df['inspection_day_of_week'] = df['inspection_date'].dt.day_name()
    
    # Calculate days between inspection and valuation
    df['days_to_valuation'] = (df['valuation_date'] - df['inspection_date']).dt.days
    
    # Value per acre (handle division by zero)
    df['value_per_acre'] = df['market_value_amount'] / df['plot_area_acres'].replace(0, np.nan)
    
    # Property value categories
    df['value_category'] = pd.cut(
        df['market_value_amount'],
        bins=[0, 10_000_000, 50_000_000, 100_000_000, 500_000_000, float('inf')],
        labels=['Under 10M', '10M-50M', '50M-100M', '100M-500M', 'Above 500M']
    )
    
    # Land size categories
    df['land_size_category'] = pd.cut(
        df['plot_area_acres'],
        bins=[0, 1, 5, 10, 50, float('inf')],
        labels=['<1 Acre', '1-5 Acres', '5-10 Acres', '10-50 Acres', '50+ Acres']
    )
    
    # Client type extraction
    def categorize_client(name):
        name_lower = str(name).lower()
        if 'bank' in name_lower:
            return 'Bank'
        elif 'sacco' in name_lower:
            return 'Sacco'
        elif 'limited' in name_lower or 'ltd' in name_lower:
            return 'Corporate'
        else:
            return 'Individual'
    
    df['client_type'] = df['client_name'].apply(categorize_client)
    
    # Has encumbrance (simplified)
    df['has_encumbrance'] = df['encumbrance_category'].apply(
        lambda x: 'Yes' if x == 'Has Charges' else 'No' if x == 'None' else 'Unknown'
    )
    
    print("‚úì Data preparation complete")
    return df

# Prepare data
df = prepare_data(df)

print(f"\nNew derived columns added:")
print(f"- Time features: inspection_year, inspection_month, inspection_quarter")
print(f"- Value metrics: value_per_acre, value_category")
print(f"- Categories: land_size_category, client_type, has_encumbrance")


In [None]:
# ===== INTERNAL METRICS DATA PREPARATION =====

def prepare_internal_data(df):
    """Add internal operational metrics for dashboard"""
    
    # 1. TURNAROUND TIME CATEGORIZATION
    def categorize_turnaround(days):
        if pd.isna(days):
            return 'Unknown'
        elif days <= 10:
            return 'Fast Track'
        elif days <= 30:
            return 'Standard'
        else:
            return 'Delayed'
    
    df['turnaround_category'] = df['days_to_valuation'].apply(categorize_turnaround)
    
    # 2. CLIENT TYPE - REPEAT VS ONE-TIME
    # Count how many times each client appears
    client_counts = df.groupby('client_name').size()
    df['client_frequency'] = df['client_name'].map(client_counts)
    
    # Categorize as Repeat (2+) or One-time (1)
    df['client_repeat_status'] = df['client_frequency'].apply(
        lambda x: 'Repeat Client' if x > 1 else 'One-time Client'
    )
    
    # 3. NAIROBI SUBLOCATIONS
    # Extract sublocation only for Nairobi county
    df['nairobi_sublocation'] = df.apply(
        lambda row: row['sublocation'] if row['location_county'] == 'Nairobi' else 'Outside Nairobi',
        axis=1
    )
    
    # 4. VALUER NAME (ensure it's clean)
    df['valuer_name'] = df['property_inspector'].fillna('Unassigned')
    
    #5. WORKLOAD PERIOD (for capacity planning)
    df['workload_week'] = df['inspection_date'].dt.isocalendar().week
    df['workload_year_week'] = df['inspection_date'].dt.strftime('%Y-W%U')
    
    # 6. REPORT AGE (days since valuation)
    df['report_age_days'] = (pd.Timestamp.now() - df['valuation_date']).dt.days
    
    # 7. PROPERTY COMPLEXITY INDICATOR
    def assess_complexity(row):
        score = 0
        # Large properties are more complex
        if row['plot_area_acres'] > 10:
            score += 2
        elif row['plot_area_acres'] > 5:
            score += 1
            
        # High value properties need more scrutiny
        if row['market_value_amount'] > 100_000_000:
            score += 2
        elif row['market_value_amount'] > 50_000_000:
            score += 1
            
        # Encumbrances add complexity
        if row.get('encumbrance_category') == 'Has Charges':
            score += 1
            
        # Categorize
        if score >= 4:
            return 'High Complexity'
        elif score >= 2:
            return 'Medium Complexity'
        else:
            return 'Low Complexity'
    
    df['property_complexity'] = df.apply(assess_complexity, axis=1)
    
    print("‚úì Internal metrics preparation complete")
    print(f"\nNew internal columns added:")
    print(f"- Operational: turnaround_category, valuer_name")
    print(f"- Client: client_repeat_status, client_frequency")
    print(f"- Geographic: nairobi_sublocation")
    # print(f"- Quality: data_completeness_score, data_quality_status")
    print(f"- Complexity: property_complexity")
    print(f"- Workload: workload_week, workload_year_week, report_age_days")
    
    return df

# Apply internal data preparation
df = prepare_internal_data(df)

# Display summary
print("\n" + "="*60)
print("INTERNAL METRICS SUMMARY")
print("="*60)
print(f"\nTurnaround Distribution:")
print(df['turnaround_category'].value_counts())
print(f"\nClient Status:")
print(df['client_repeat_status'].value_counts())
# print(f"\nData Quality:")
# print(df['data_quality_status'].value_counts())
print(f"\nActive Valuers: {df['valuer_name'].nunique()}")
print(f"Nairobi Sublocations: {df['nairobi_sublocation'].nunique()}")

In [None]:
# ===== INTERNAL METRICS CALCULATION =====

def calculate_internal_metrics(df):
    """Calculate comprehensive internal operational metrics"""
    
    internal_metrics = {}
    
    # ===== 1. OPERATIONAL EFFICIENCY =====
    internal_metrics['operations'] = {
        'total_reports': len(df),
        'avg_turnaround_days': df['days_to_valuation'].mean(),
        'median_turnaround_days': df['days_to_valuation'].median(),
        'std_turnaround_days': df['days_to_valuation'].std(),
        'fast_track_count': len(df[df['turnaround_category'] == 'Fast Track']),
        'standard_count': len(df[df['turnaround_category'] == 'Standard']),
        'delayed_count': len(df[df['turnaround_category'] == 'Delayed']),
        'pct_fast_track': (len(df[df['turnaround_category'] == 'Fast Track']) / len(df) * 100),
        'pct_delayed': (len(df[df['turnaround_category'] == 'Delayed']) / len(df) * 100),
        'reports_at_risk': len(df[df['turnaround_category'] == 'Delayed'])
    }
    
    # ===== 2. VALUER PERFORMANCE =====
    valuer_stats = df.groupby('valuer_name').agg({
        'property_id': 'count',
        'days_to_valuation': ['mean', 'median'],
        'market_value_amount': 'sum',
    })
    valuer_stats.columns = ['reports_count', 'avg_turnaround', 'median_turnaround', 
                            'total_value_handled']
    
    # Turnaround breakdown by valuer
    valuer_turnaround = df.groupby(['valuer_name', 'turnaround_category']).size().unstack(fill_value=0)
    
    internal_metrics['valuers'] = {
        'active_valuers': df['valuer_name'].nunique(),
        'avg_reports_per_valuer': len(df) / df['valuer_name'].nunique(),
        'performance_by_valuer': valuer_stats.to_dict('index'),
        'turnaround_by_valuer': valuer_turnaround.to_dict('index'),
        'top_performer': valuer_stats['reports_count'].idxmax(),
        'fastest_valuer': valuer_stats['avg_turnaround'].idxmin(),
        'most_value_handled': valuer_stats['total_value_handled'].idxmax()
    }
    
    # ===== 3. CLIENT RETENTION =====
    repeat_clients = df[df['client_repeat_status'] == 'Repeat Client']
    onetime_clients = df[df['client_repeat_status'] == 'One-time Client']
    
    internal_metrics['client_retention'] = {
        'total_unique_clients': df['client_name'].nunique(),
        'repeat_client_reports': len(repeat_clients),
        'onetime_client_reports': len(onetime_clients),
        'repeat_rate': (len(repeat_clients) / len(df) * 100),
        'onetime_rate': (len(onetime_clients) / len(df) * 100),
        'avg_reports_per_client': df.groupby('client_name').size().mean(),
        'max_reports_per_client': df.groupby('client_name').size().max(),
        'top_repeat_client': df[df['client_repeat_status'] == 'Repeat Client'].groupby('client_name').size().idxmax()
    }
    
    # ===== 4. WORKLOAD DISTRIBUTION =====
    # By month
    monthly_workload = df.groupby('inspection_month_name').size().to_dict()
    
    # By week (recent 12 weeks)
    weekly_workload = df.groupby('workload_year_week').size().tail(12).to_dict()
    
    # By valuer and month
    valuer_monthly = df.groupby(['valuer_name', 'inspection_month_name']).size().unstack(fill_value=0)
    
    internal_metrics['workload'] = {
        'monthly_distribution': monthly_workload,
        'weekly_distribution': weekly_workload,
        'peak_month': max(monthly_workload, key=monthly_workload.get),
        'lowest_month': min(monthly_workload, key=monthly_workload.get),
        'avg_reports_per_month': df.groupby('inspection_month_name').size().mean(),
        'valuer_monthly_matrix': valuer_monthly.to_dict('index')
    }
    
    
    # ===== 6. GEOGRAPHIC COVERAGE =====
    counties_covered = df['location_county'].nunique()
    reports_per_county = df.groupby('location_county').size().to_dict()
    
    # Nairobi-specific
    nairobi_df = df[df['location_county'] == 'Nairobi']
    nairobi_sublocations = nairobi_df['nairobi_sublocation'].value_counts().to_dict() if len(nairobi_df) > 0 else {}
    
    # Valuer geographic specialization
    valuer_counties = df.groupby(['valuer_name', 'location_county']).size().unstack(fill_value=0)
    
    internal_metrics['geographic_coverage'] = {
        'counties_covered': counties_covered,
        'reports_per_county': reports_per_county,
        'nairobi_sublocations': nairobi_sublocations,
        'nairobi_report_count': len(nairobi_df),
        'valuer_county_specialization': valuer_counties.to_dict('index'),
        'most_covered_county': max(reports_per_county, key=reports_per_county.get)
    }
    
    # ===== 7. PROPERTY COMPLEXITY =====
    complexity_dist = df['property_complexity'].value_counts().to_dict()
    
    # Average turnaround by complexity
    turnaround_by_complexity = df.groupby('property_complexity')['days_to_valuation'].mean().to_dict()
    
    internal_metrics['complexity'] = {
        'distribution': complexity_dist,
        'avg_turnaround_by_complexity': turnaround_by_complexity,
        'high_complexity_count': len(df[df['property_complexity'] == 'High Complexity']),
        'pct_high_complexity': (len(df[df['property_complexity'] == 'High Complexity']) / len(df) * 100)
    }
    
    # ===== 8. CAPACITY & UTILIZATION =====
    current_month = df['inspection_date'].max().replace(day=1)
    last_month = current_month - timedelta(days=1)
    last_month = last_month.replace(day=1)
    
    current_month_reports = len(df[df['inspection_date'] >= current_month])
    last_month_reports = len(df[(df['inspection_date'] >= last_month) & 
                                 (df['inspection_date'] < current_month)])
    
    internal_metrics['capacity'] = {
        'current_month_volume': current_month_reports,
        'last_month_volume': last_month_reports,
        'mom_change': ((current_month_reports - last_month_reports) / last_month_reports * 100) if last_month_reports > 0 else 0,
        'avg_daily_completions': len(df) / df['valuation_date'].nunique(),
        'reports_per_valuer_per_month': (len(df) / df['valuer_name'].nunique()) / df['inspection_month'].nunique()
    }
    
    print("‚úì Internal metrics calculated successfully")
    return internal_metrics


# Calculate internal metrics
internal_metrics = calculate_internal_metrics(df)

print("\n" + "="*80)
print("INTERNAL METRICS SUMMARY")
print("="*80)
print(f"\nOperations: {len(internal_metrics['operations'])} metrics")
print(f"Valuers: {len(internal_metrics['valuers'])} metrics")
print(f"Client Retention: {len(internal_metrics['client_retention'])} metrics")
print(f"Workload: {len(internal_metrics['workload'])} metrics")
# print(f"Data Quality: {len(internal_metrics['data_quality'])} metrics")
print(f"Geographic Coverage: {len(internal_metrics['geographic_coverage'])} metrics")
print(f"Complexity: {len(internal_metrics['complexity'])} metrics")
print(f"Capacity: {len(internal_metrics['capacity'])} metrics")

# Display key highlights
print("\n" + "="*80)
print("KEY OPERATIONAL HIGHLIGHTS")
print("="*80)
print(f"\nüéØ Turnaround Performance:")
print(f"   - Average: {internal_metrics['operations']['avg_turnaround_days']:.1f} days")
print(f"   - Fast Track: {internal_metrics['operations']['pct_fast_track']:.1f}%")
print(f"   - Delayed: {internal_metrics['operations']['pct_delayed']:.1f}%")

print(f"\nüë• Team Performance:")
print(f"   - Active Valuers: {internal_metrics['valuers']['active_valuers']}")
print(f"   - Avg Reports/Valuer: {internal_metrics['valuers']['avg_reports_per_valuer']:.1f}")
print(f"   - Top Performer: {internal_metrics['valuers']['top_performer']}")

print(f"\n Client Retention:")
print(f"   - Repeat Client Rate: {internal_metrics['client_retention']['repeat_rate']:.1f}%")
print(f"   - Unique Clients: {internal_metrics['client_retention']['total_unique_clients']}")

# print(f"\n Data Quality:")
# print(f"   - Average Completeness: {internal_metrics['data_quality']['avg_completeness_score']:.1f}%")
# print(f"   - Complete Reports: {internal_metrics['data_quality']['pct_complete']:.1f}%")

In [None]:
def calculate_all_metrics(df):
    """Calculate comprehensive metrics for the report"""
    
    metrics = {}
    
    # ===== 1. PORTFOLIO OVERVIEW =====
    metrics['portfolio'] = {
        'total_properties': len(df),
        'total_value': df['market_value_amount'].sum(),
        'average_value': df['market_value_amount'].mean(),
        'median_value': df['market_value_amount'].median(),
        'std_value': df['market_value_amount'].std(),
        'total_acres': df['plot_area_acres'].sum(),
        'total_hectares': df['plot_area_hectares'].sum(),
        'average_acres': df['plot_area_acres'].mean(),
        'cv': (df['market_value_amount'].std() / df['market_value_amount'].mean()) * 100
    }
    
    # ===== 2. TEMPORAL METRICS =====
    current_month = df['inspection_date'].max().replace(day=1)
    last_month = current_month - timedelta(days=1)
    last_month = last_month.replace(day=1)
    
    current_month_props = len(df[df['inspection_date'] >= current_month])
    last_month_props = len(df[(df['inspection_date'] >= last_month) & 
                               (df['inspection_date'] < current_month)])
    
    current_month_value = df[df['inspection_date'] >= current_month]['market_value_amount'].sum()
    last_month_value = df[(df['inspection_date'] >= last_month) & 
                          (df['inspection_date'] < current_month)]['market_value_amount'].sum()
    
    metrics['temporal'] = {
        'mom_growth_count': ((current_month_props - last_month_props) / last_month_props * 100) if last_month_props > 0 else 0,
        'mom_growth_value': ((current_month_value - last_month_value) / last_month_value * 100) if last_month_value > 0 else 0,
        'ytd_properties': len(df[df['inspection_date'].dt.year == datetime.now().year]),
        'ytd_value': df[df['inspection_date'].dt.year == datetime.now().year]['market_value_amount'].sum(),
        'avg_days_to_valuation': df['days_to_valuation'].mean(),
        'median_days_to_valuation': df['days_to_valuation'].median(),
        'current_month_props': current_month_props,
        'last_month_props': last_month_props
    }
    
    # ===== 3. CLIENT METRICS =====
    client_breakdown = df.groupby('client_type').agg({
        'property_id': 'count',
        'market_value_amount': 'sum'
    }).to_dict('index')
    
    top_clients = df.groupby('client_name').agg({
        'property_id': 'count',
        'market_value_amount': 'sum'
    }).sort_values('market_value_amount', ascending=False).head(5)
    
    total_value = df['market_value_amount'].sum()
    top5_value = top_clients['market_value_amount'].sum()
    
    metrics['clients'] = {
        'by_type': client_breakdown,
        'top_5_clients': top_clients.to_dict('index'),
        'unique_clients': df['client_name'].nunique(),
        'concentration_ratio': (top5_value / total_value * 100) if total_value > 0 else 0,
        'avg_value_per_client': df.groupby('client_name')['market_value_amount'].sum().mean()
    }
    
    # ===== 4. GEOGRAPHIC METRICS =====
    county_analysis = df.groupby('location_county').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    county_analysis.columns = ['count', 'total_value', 'avg_value']
    county_analysis = county_analysis.sort_values('total_value', ascending=False)
    
    metrics['geographic'] = {
        'num_counties': df['location_county'].nunique(),
        'top_5_counties': county_analysis.head(5).to_dict('index'),
        'concentration': (county_analysis.head(5)['total_value'].sum() / total_value * 100) if total_value > 0 else 0,
        'highest_avg_county': county_analysis['avg_value'].idxmax(),
        'most_active_county': county_analysis['count'].idxmax()
    }
    
    # ===== 5. LAND USE METRICS =====
    land_use_analysis = df.groupby('land_use').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    land_use_analysis.columns = ['count', 'total_value', 'avg_value']
    land_use_analysis['pct_value'] = (land_use_analysis['total_value'] / total_value * 100)
    
    metrics['land_use'] = {
        'breakdown': land_use_analysis.to_dict('index'),
        'dominant': land_use_analysis['total_value'].idxmax(),
        'highest_avg': land_use_analysis['avg_value'].idxmax()
    }
    
    # ===== 6. VALUE DISTRIBUTION =====
    value_cat_dist = df['value_category'].value_counts().to_dict()
    metrics['value_distribution'] = {
        'by_category': value_cat_dist,
        'top_10_pct_value': df.nlargest(int(len(df) * 0.1), 'market_value_amount')['market_value_amount'].sum() / total_value * 100
    }
    
    # ===== 7. RISK & ENCUMBRANCE =====
    encumbrance_analysis = df.groupby('encumbrance_category').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    encumbrance_analysis.columns = ['count', 'total_value', 'avg_value']
    
    metrics['risk'] = {
        'encumbrance_breakdown': encumbrance_analysis.to_dict('index'),
        'pct_encumbered': (len(df[df['encumbrance_category'] == 'Has Charges']) / len(df) * 100),
        'value_at_risk': df[df['encumbrance_category'] == 'Has Charges']['market_value_amount'].sum(),
        'pct_value_encumbered': (df[df['encumbrance_category'] == 'Has Charges']['market_value_amount'].sum() / total_value * 100)
    }
    
    # ===== 8. TENURE METRICS =====
    tenure_analysis = df.groupby('tenure_type').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    tenure_analysis.columns = ['count', 'total_value', 'avg_value']
    
    metrics['tenure'] = tenure_analysis.to_dict('index')
    
    # ===== 9. VALUE PER ACRE =====
    vpa_df = df[df['value_per_acre'].notna()]
    
    vpa_by_county = vpa_df.groupby('location_county')['value_per_acre'].mean().sort_values(ascending=False).head(5)
    vpa_by_landuse = vpa_df.groupby('land_use')['value_per_acre'].mean().sort_values(ascending=False)
    
    metrics['value_per_acre'] = {
        'overall_avg': vpa_df['value_per_acre'].mean(),
        'overall_median': vpa_df['value_per_acre'].median(),
        'by_county_top5': vpa_by_county.to_dict(),
        'by_land_use': vpa_by_landuse.to_dict()
    }
    
    # ===== 10. PROPERTY CHARACTERISTICS =====
    metrics['characteristics'] = {
        'soil_type': df['soil_type'].value_counts().to_dict(),
        'plot_shape': df['plot_shape'].value_counts().to_dict(),
        'gradient': df['gradient'].value_counts().to_dict(),
        'most_common_soil': df['soil_type'].mode()[0] if not df['soil_type'].mode().empty else 'N/A',
        'most_common_shape': df['plot_shape'].mode()[0] if not df['plot_shape'].mode().empty else 'N/A'
    }
    
    # ===== 11. PERFORMANCE METRICS =====
    metrics['performance'] = {
        'num_valuers': df['valuer_name'].nunique(),
        'avg_properties_per_valuer': len(df) / df['valuer_name'].nunique(),
        'most_active_valuer': df['valuer_name'].value_counts().index[0],
        'monthly_avg_inspections': len(df) / df['inspection_month'].nunique()
    }
    
    print("‚úì All metrics calculated successfully")
    return metrics

# Calculate metrics
metrics = calculate_all_metrics(df)

print("\n METRICS SUMMARY:")
print(f"Portfolio: {len(metrics['portfolio'])} metrics")
print(f"Temporal: {len(metrics['temporal'])} metrics")
print(f"Clients: {len(metrics['clients'])} metrics")
print(f"Geographic: {len(metrics['geographic'])} metrics")
print(f"Land Use: {len(metrics['land_use'])} metrics")
print(f"Risk: {len(metrics['risk'])} metrics")


In [None]:
def create_summary_charts(df, metrics):
    """Create key visualizations for the report"""
    
    # Set style
    plt.style.use('seaborn-v0_8-whitegrid')
    sns.set_palette("Set2")
    
    # Create figure with 6 subplots
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    fig.suptitle('NW Realite - Property Valuation Dashboard', fontsize=16, fontweight='bold', y=0.995)
    
    # 1. Top 10 Counties by Value
    county_value = df.groupby('location_county')['market_value_amount'].sum().sort_values(ascending=False).head(10)
    axes[0, 0].barh(range(len(county_value)), county_value.values / 1_000_000, color='steelblue')
    axes[0, 0].set_yticks(range(len(county_value)))
    axes[0, 0].set_yticklabels(county_value.index, fontsize=9)
    axes[0, 0].set_xlabel('Total Value (KShs Millions)', fontsize=9)
    axes[0, 0].set_title('Top 10 Counties by Total Value', fontsize=10, fontweight='bold')
    axes[0, 0].invert_yaxis()
    
    # 2. Land Use Distribution
    land_use_value = df.groupby('land_use')['market_value_amount'].sum().sort_values(ascending=False)
    colors = sns.color_palette('pastel')[0:len(land_use_value)]
    axes[0, 1].pie(land_use_value, labels=land_use_value.index, autopct='%1.1f%%', 
                   colors=colors, startangle=90)
    axes[0, 1].set_title('Portfolio by Land Use', fontsize=10, fontweight='bold')
    
    # 3. Client Type Distribution
    client_type_value = df.groupby('client_type')['market_value_amount'].sum().sort_values(ascending=False)
    axes[0, 2].bar(range(len(client_type_value)), client_type_value.values / 1_000_000, 
                   color='coral', edgecolor='black')
    axes[0, 2].set_xticks(range(len(client_type_value)))
    axes[0, 2].set_xticklabels(client_type_value.index, fontsize=9)
    axes[0, 2].set_ylabel('Total Value (KShs Millions)', fontsize=9)
    axes[0, 2].set_title('Total Value by Client Type', fontsize=10, fontweight='bold')
    axes[0, 2].grid(axis='y', alpha=0.3)
    
    # 4. Monthly Inspection Trend
    monthly_trend = df.groupby(df['inspection_date'].dt.to_period('M')).size()
    monthly_trend.index = monthly_trend.index.to_timestamp()
    axes[1, 0].plot(monthly_trend.index, monthly_trend.values, marker='o', 
                    linewidth=2, markersize=6, color='green')
    axes[1, 0].set_xlabel('Month', fontsize=9)
    axes[1, 0].set_ylabel('Properties Inspected', fontsize=9)
    axes[1, 0].set_title('Monthly Inspection Trend', fontsize=10, fontweight='bold')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(alpha=0.3)
    
    # 5. Encumbrance Status
    encumbrance_counts = df['encumbrance_category'].value_counts()
    colors_enc = {'None': 'green', 'Has Charges': 'orange', 'Unascertained': 'gray'}
    bar_colors = [colors_enc.get(x, 'blue') for x in encumbrance_counts.index]
    axes[1, 1].bar(range(len(encumbrance_counts)), encumbrance_counts.values, 
                   color=bar_colors, edgecolor='black')
    axes[1, 1].set_xticks(range(len(encumbrance_counts)))
    axes[1, 1].set_xticklabels(encumbrance_counts.index, fontsize=9)
    axes[1, 1].set_ylabel('Count', fontsize=9)
    axes[1, 1].set_title('Encumbrance Status', fontsize=10, fontweight='bold')
    axes[1, 1].grid(axis='y', alpha=0.3)
    
    # 6. Value Distribution
    value_cat_counts = df['value_category'].value_counts().sort_index()
    axes[1, 2].bar(range(len(value_cat_counts)), value_cat_counts.values, 
                   color='mediumpurple', edgecolor='black')
    axes[1, 2].set_xticks(range(len(value_cat_counts)))
    axes[1, 2].set_xticklabels(value_cat_counts.index, fontsize=8, rotation=45, ha='right')
    axes[1, 2].set_ylabel('Count', fontsize=9)
    axes[1, 2].set_title('Property Value Categories', fontsize=10, fontweight='bold')
    axes[1, 2].grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    
    # Save to BytesIO
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png', dpi=150, bbox_inches='tight')
    img_buffer.seek(0)
    plt.close()
    
    print("‚úì Summary visualizations created")
    return img_buffer

# Create charts
chart_buffer = create_summary_charts(df, metrics)


In [None]:
# ===== INTERNAL AI REPORT GENERATOR =====
print(F'API KEY: {GEMINI_API_KEY}')

def generate_internal_ai_report(internal_metrics, df):
    """Generate internal operations report using Gemini"""
    
    # Initialize Gemini model
    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel("gemini-2.5-flash")
    
    # Prepare internal metrics summary
    metrics_summary = f"""
# NW REALITE INTERNAL OPERATIONS DATA SUMMARY - 2025

## Operational Efficiency
- Total Reports Completed: {internal_metrics['operations']['total_reports']}
- Average Turnaround Time: {internal_metrics['operations']['avg_turnaround_days']:.1f} days
- Median Turnaround Time: {internal_metrics['operations']['median_turnaround_days']:.1f} days
- Fast Track Reports: {internal_metrics['operations']['fast_track_count']} ({internal_metrics['operations']['pct_fast_track']:.1f}%)
- Standard Reports: {internal_metrics['operations']['standard_count']}
- Delayed Reports: {internal_metrics['operations']['delayed_count']} ({internal_metrics['operations']['pct_delayed']:.1f}%)
- Reports Currently At Risk: {internal_metrics['operations']['reports_at_risk']}

## Team Performance
- Active Valuers: {internal_metrics['valuers']['active_valuers']}
- Average Reports per Valuer: {internal_metrics['valuers']['avg_reports_per_valuer']:.1f}
- Top Performer (Volume): {internal_metrics['valuers']['top_performer']}
- Fastest Valuer (Turnaround): {internal_metrics['valuers']['fastest_valuer']}
- Highest Value Handler: {internal_metrics['valuers']['most_value_handled']}

Top 3 Valuers by Reports:
{json.dumps(dict(list(sorted(
    [(k, v['reports_count']) for k, v in internal_metrics['valuers']['performance_by_valuer'].items()],
    key=lambda x: x[1], reverse=True
)[:3])), indent=2)}

## Client Retention & Engagement
- Total Unique Clients: {internal_metrics['client_retention']['total_unique_clients']}
- Repeat Client Reports: {internal_metrics['client_retention']['repeat_client_reports']} ({internal_metrics['client_retention']['repeat_rate']:.1f}%)
- One-time Client Reports: {internal_metrics['client_retention']['onetime_client_reports']} ({internal_metrics['client_retention']['onetime_rate']:.1f}%)
- Average Reports per Client: {internal_metrics['client_retention']['avg_reports_per_client']:.1f}
- Maximum Reports per Client: {internal_metrics['client_retention']['max_reports_per_client']}

## Workload Distribution
- Peak Month: {internal_metrics['workload']['peak_month']}
- Lowest Month: {internal_metrics['workload']['lowest_month']}
- Average Reports per Month: {internal_metrics['workload']['avg_reports_per_month']:.1f}

Monthly Distribution (Sample):
{json.dumps(dict(list(internal_metrics['workload']['monthly_distribution'].items())[:6]), indent=2)}


## Geographic Coverage
- Counties Covered: {internal_metrics['geographic_coverage']['counties_covered']}
- Most Covered County: {internal_metrics['geographic_coverage']['most_covered_county']}
- Nairobi Reports: {internal_metrics['geographic_coverage']['nairobi_report_count']}
- Nairobi Sublocations Active: {len(internal_metrics['geographic_coverage']['nairobi_sublocations'])}

## Property Complexity Analysis
- High Complexity Properties: {internal_metrics['complexity']['high_complexity_count']} ({internal_metrics['complexity']['pct_high_complexity']:.1f}%)
- Average Turnaround by Complexity:
{json.dumps(internal_metrics['complexity']['avg_turnaround_by_complexity'], indent=2)}

## Capacity & Growth
- Current Month Volume: {internal_metrics['capacity']['current_month_volume']}
- Last Month Volume: {internal_metrics['capacity']['last_month_volume']}
- Month-over-Month Change: {internal_metrics['capacity']['mom_change']:.1f}%
- Average Daily Completions: {internal_metrics['capacity']['avg_daily_completions']:.1f}
- Reports per Valuer per Month: {internal_metrics['capacity']['reports_per_valuer_per_month']:.1f}
"""
    
    prompt = f"""
You are Samuel Wanyua, Data Analytics Engineer & Business Intelligence at NW Realite. Create an INTERNAL OPERATIONS REPORT for management 
focused on team performance, operational efficiency, and process improvements.

IMPORTANT FORMATTING:
- NO markdown formatting (no *, **, #)
- USE LAYMAN'S LANGUAGE PLEASE! USE LAYMAN'S LANGUAGE PLEASE!
- Use numbered sections with clear headers
- Simple dashes (-) for bullet points
- Plain text paragraphs
- Consistent font size throughout
- Date: 8th December 2025
- Prepared by Samuel Wanyua, Data Analytics Engineer & Business Intelligence, NW Realite

{metrics_summary}

STRUCTURE:

INTERNAL OPERATIONS REPORT
Date: 8th December 2025
Prepared by Samuel Wanyua, Data Analytics Engineer & Business Intelligence, NW Realite

EXECUTIVE SUMMARY
Write 2-3 paragraphs covering overall operational performance, key achievements, and critical areas needing attention.

1. OPERATIONAL EFFICIENCY ANALYSIS
- Turnaround time performance and trends
- Process bottlenecks and delays
- Comparison against industry standards (assume 7-14 days is standard)
Write in 2-3 paragraphs.

2. TEAM PERFORMANCE REVIEW
- Individual valuer productivity analysis
- Workload distribution and balance
- Performance consistency across the team
- Top performers and those needing support
Write in 2-3 paragraphs.

3. CLIENT RETENTION & RELATIONSHIP MANAGEMENT
- Repeat business analysis
- Client loyalty indicators
- Revenue concentration risks
- Opportunities for client development
Write in 2 paragraphs.

4. CAPACITY PLANNING & RESOURCE ALLOCATION
- Current capacity utilization
- Growth trends and projections
- Geographic coverage gaps
- Staffing recommendations
Write in 2 paragraphs.

5. STRATEGIC RECOMMENDATIONS FOR MANAGEMENT
Provide 7-10 specific, actionable recommendations numbered as:
1. [Recommendation]
2. [Recommendation]
etc.

Focus on: process improvements, training needs, resource allocation, quality control, client retention strategies, and efficiency gains.

6. PRIORITY ACTION ITEMS
List 5 immediate actions that need attention in the next 30 days, numbered as:
1. [Action item]
2. [Action item]
etc.

CRITICAL: Use plain text only. No markdown. No asterisks. No special formatting. Consistent font throughout.
Write professionally and data-driven, focused on helping management make operational decisions.
"""
    
    try:
        response = model.generate_content(prompt)
        report_text = response.text
        
        # Clean up any markdown artifacts
        report_text = report_text.replace('**', '')
        report_text = report_text.replace('*', '')
        report_text = report_text.replace('##', '')
        report_text = report_text.replace('#', '')
        
        print("‚úì Internal AI report generated successfully")
        return report_text
        
    except Exception as e:
        print(f"‚úó Error generating internal AI report: {e}")
        return None


# Generate internal AI report
internal_ai_report = generate_internal_ai_report(internal_metrics, df)

if internal_ai_report:
    print("\n" + "="*80)
    print("INTERNAL AI REPORT PREVIEW (First 500 characters):")
    print("="*80)
    print(internal_ai_report[:500] + "...")
    
    # Save to file
    with open('internal_operations_report_2025.txt', 'w', encoding='utf-8') as f:
        f.write(internal_ai_report)
    print("\n‚úì Internal report saved to: internal_operations_report_2025.txt")
else:
    print("Failed to generate internal report. Check your Gemini API key.")

In [None]:
def generate_ai_report(metrics, df):
    """Use Gemini 2.5 Flash to generate comprehensive report"""
    
    # Initialize Gemini model
    genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
    model = genai.GenerativeModel("gemini-2.5-flash")
    
    # Prepare metrics summary for AI
    metrics_summary = f"""
# PROPERTY VALUATION DATA SUMMARY

## Portfolio Overview
- Total Properties: {metrics['portfolio']['total_properties']}
- Total Market Value: KShs {metrics['portfolio']['total_value']:,.0f}
- Average Property Value: KShs {metrics['portfolio']['average_value']:,.0f}
- Median Property Value: KShs {metrics['portfolio']['median_value']:,.0f}
- Total Land Area: {metrics['portfolio']['total_acres']:.2f} acres
- Coefficient of Variation: {metrics['portfolio']['cv']:.1f}%

## Growth Metrics
- MoM Growth (Properties): {metrics['temporal']['mom_growth_count']:.1f}%
- MoM Growth (Value): {metrics['temporal']['mom_growth_value']:.1f}%
- YTD Properties: {metrics['temporal']['ytd_properties']}
- YTD Value: KShs {metrics['temporal']['ytd_value']:,.0f}
- Avg Days to Valuation: {metrics['temporal']['avg_days_to_valuation']:.1f} days

## Client Segmentation
{json.dumps(metrics['clients']['by_type'], indent=2)}
- Unique Clients: {metrics['clients']['unique_clients']}
- Top 5 Client Concentration: {metrics['clients']['concentration_ratio']:.1f}%

## Geographic Distribution
- Counties Covered: {metrics['geographic']['num_counties']}
- Geographic Concentration (Top 5): {metrics['geographic']['concentration']:.1f}%
- Most Active County: {metrics['geographic']['most_active_county']}

## Land Use Breakdown
{json.dumps(metrics['land_use']['breakdown'], indent=2)}

## Risk Metrics
- Properties with Encumbrances: {metrics['risk']['pct_encumbered']:.1f}%
- Value at Risk: KShs {metrics['risk']['value_at_risk']:,.0f}
- % Portfolio Value Encumbered: {metrics['risk']['pct_value_encumbered']:.1f}%

## Value per Acre
- Overall Average: KShs {metrics['value_per_acre']['overall_avg']:,.0f}/acre
- Overall Median: KShs {metrics['value_per_acre']['overall_median']:,.0f}/acre

## Performance
- Number of Valuers: {metrics['performance']['num_valuers']}
- Most Active Valuer: {metrics['performance']['most_active_valuer']}
- Monthly Avg Inspections: {metrics['performance']['monthly_avg_inspections']:.1f}
"""
    
    prompt = f"""
You are Samuel Wanyua, a Data Analytics Engineer & Business Intelligence at NW Realite. Based on the comprehensive data summary below, 
create a detailed 2-page executive report for stakeholders. 

IMPORTANT FORMATTING INSTRUCTIONS:
- Do NOT use markdown asterisks (*) for emphasis
- USE LAYMAN'S LANGUAGE PLEASE! USE LAYMAN'S LANGUAGE PLEASE!
- Use clear section headers with numbers
- Write in plain text paragraphs
- For bullet points, use simple dashes (-)
- Keep the tone professional and data-driven
- Date: 2nd December 2025
- Reference that the report is prepared by Samuel Wanyua, Data Analytics Engineer & Business Intelligence

{metrics_summary}

The report should be structured as follows:
Title
date - 8th Dec 2025
then  Prepared by Samuel Wanyua, Data Analytics Engineer & Business Intelligence, NW Realite

EXECUTIVE SUMMARY

1. Portfolio Overview
Provide a high-level summary in 2-3 paragraphs covering portfolio size, total value, and key characteristics.

2. Key Performance Indicators
List the most critical metrics in bullet points (use simple dashes).

3. Growth & Trends Analysis
Analyze growth trends, seasonal patterns, and inspection efficiency in 1-2 paragraphs.

4. Client & Geographic Insights
Discuss client concentration, diversification, and geographic distribution in 1-2 paragraphs.

DETAILED ANALYSIS & RECOMMENDATIONS

5. Risk Assessment
Analyze encumbrance levels, value at risk, and portfolio risk profile in 2 paragraphs.

6. Land Use & Value Analysis
Discuss land use distribution, value per acre trends, and property characteristics in 1-2 paragraphs.

7. Strategic Recommendations
Provide 5-7 actionable recommendations as numbered points. (use consistent font size as with others)

8. Key Takeaways
Summarize 3-5 most important insights as numbered points. (use consistent font size as with others)

CRITICAL: Do not use any markdown formatting (no *, **, #, etc.). Write in clean, professional prose with clear section numbers and simple bullet points using dashes with consistent font size throughout the document.
"""
    
    try:
        response = model.generate_content(prompt)
        report_text = response.text
        
        # Clean up any remaining markdown artifacts
        report_text = report_text.replace('**', '')
        report_text = report_text.replace('*', '')
        report_text = report_text.replace('##', '')
        report_text = report_text.replace('#', '')
        
        print("‚úì AI report generated successfully")
        return report_text
        
    except Exception as e:
        print(f"‚úó Error generating AI report: {e}")
        return None


# Generate AI report
ai_report = generate_ai_report(metrics, df)

if ai_report:
    print("\n" + "="*80)
    print("AI-GENERATED REPORT PREVIEW (First 500 characters):")
    print("="*80)
    print(ai_report[:500] + "...")
else:
    print("Failed to generate AI report. Check your Gemini API key.")

In [None]:
def create_pdf_report(metrics, ai_report, chart_buffer, filename="NW_Realite_Valuation_Report.pdf"):
    """Generate professional PDF report"""
    doc = SimpleDocTemplate(
        filename,
        pagesize=A4,
        topMargin=0.75*inch,
        bottomMargin=0.75*inch,
        leftMargin=0.75*inch,
        rightMargin=0.75*inch,
    )
    
    story = []
    styles = getSampleStyleSheet()
    
    # ===== CUSTOM STYLES =====
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Title'],
        fontSize=24,
        textColor=colors.HexColor("#000000"),
        spaceAfter=12,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )
    
    heading_style = ParagraphStyle(
        'CustomHeading',
        parent=styles['Heading1'],
        fontSize=14,
        textColor=colors.HexColor("#000000"),
        spaceAfter=10,
        spaceBefore=15,
        fontName='Helvetica-Bold'
    )
    
    subheading_style = ParagraphStyle(
        'CustomSubHeading',
        parent=styles['Heading2'],
        fontSize=11,
        textColor=colors.HexColor("#000000"),
        spaceAfter=6,
        spaceBefore=8,
        fontName='Helvetica-Bold'
    )
    
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['BodyText'],
        fontSize=10,
        leading=14,
        alignment=TA_LEFT,
        spaceAfter=8
    )
    
    # ===== TITLE PAGE =====
    story.append(Paragraph(COMPANY_NAME, title_style))
    story.append(Spacer(1, 0.2*inch))
    story.append(Spacer(1, 0.4*inch))
    
    # Process the AI report text
    # Split by paragraphs and detect section headers
    lines = ai_report.split('\n')
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # Detect section headers (lines that start with numbers or are in ALL CAPS)
        if (line and (line[0].isdigit() and '.' in line[:3])) or line.isupper():
            # This is a section header
            story.append(Spacer(1, 0.15*inch))
            story.append(Paragraph(line, subheading_style))
        else:
            # Regular paragraph
            story.append(Paragraph(line, body_style))
    
    story.append(Spacer(1, 0.5*inch))
    
    # ===== PAGE BREAK BEFORE METRICS (OPTIONAL) =====
    story.append(PageBreak())  
    
    # ===== KEY METRICS TABLE (MOVED AFTER AI REPORT) =====
    story.append(Paragraph("Executive Summary - Key Metrics", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    key_metrics_data = [
        ['Metric', 'Value'],
        ['Total Properties', f"{metrics['portfolio']['total_properties']:,}"],
        ['Total Portfolio Value', f"KShs {metrics['portfolio']['total_value']:,.0f}"],
        ['Average Property Value', f"KShs {metrics['portfolio']['average_value']:,.0f}"],
        ['Total Land Area', f"{metrics['portfolio']['total_acres']:.2f} acres"],
        ['Properties with Encumbrances', f"{metrics['risk']['pct_encumbered']:.1f}%"],
        ['MoM Growth (Properties)', f"{metrics['temporal']['mom_growth_count']:.1f}%"],
        ['Avg Days to Valuation', f"{metrics['temporal']['avg_days_to_valuation']:.1f} days"],
    ]
    
    key_metrics_table = Table(key_metrics_data, colWidths=[3.5*inch, 2.5*inch])
    key_metrics_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 11),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(key_metrics_table)
    story.append(Spacer(1, 0.3*inch))
    
    # ===== CHART IMAGE =====
    # story.append(Paragraph("Portfolio Trend Chart", heading_style))
    # story.append(Spacer(1, 0.1*inch))
    # chart_buffer.seek(0)
    # story.append(Image(chart_buffer, width=6*inch, height=3.5*inch))
    
    # ===== BUILD PDF =====
    doc.build(story)
    print(f"‚úì PDF report created successfully: {filename}")


# Create the PDF report
create_pdf_report(metrics, ai_report, chart_buffer, filename="nw_realite_report.pdf")

In [None]:
def create_internal_pdf_report(internal_metrics, internal_ai_report, filename="NW_Realite_Internal_Operations_Report.pdf"):
    """Generate professional internal operations PDF report"""
    doc = SimpleDocTemplate(
        filename,
        pagesize=A4,
        topMargin=0.75*inch,
        bottomMargin=0.75*inch,
        leftMargin=0.75*inch,
        rightMargin=0.75*inch,
    )
    
    story = []
    styles = getSampleStyleSheet()
    
    # ===== CUSTOM STYLES =====
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Title'],
        fontSize=24,
        textColor=colors.HexColor("#000000"),
        spaceAfter=12,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )
    
    subtitle_style = ParagraphStyle(
        'CustomSubtitle',
        parent=styles['Normal'],
        fontSize=12,
        textColor=colors.HexColor("#444444"),
        spaceAfter=20,
        alignment=TA_CENTER,
        fontName='Helvetica'
    )
    
    heading_style = ParagraphStyle(
        'CustomHeading',
        parent=styles['Heading1'],
        fontSize=14,
        textColor=colors.HexColor("#000000"),
        spaceAfter=10,
        spaceBefore=15,
        fontName='Helvetica-Bold'
    )
    
    subheading_style = ParagraphStyle(
        'CustomSubHeading',
        parent=styles['Heading2'],
        fontSize=11,
        textColor=colors.HexColor("#000000"),
        spaceAfter=6,
        spaceBefore=8,
        fontName='Helvetica-Bold'
    )
    
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['BodyText'],
        fontSize=10,
        leading=14,
        alignment=TA_LEFT,
        spaceAfter=8
    )
    
    # ===== TITLE PAGE =====
    story.append(Paragraph(COMPANY_NAME, title_style))
    story.append(Paragraph("Internal Operations Report 2025", subtitle_style))
    story.append(Spacer(1, 0.2*inch))
    
    # Add confidentiality notice
    confidential_style = ParagraphStyle(
        'Confidential',
        parent=styles['Normal'],
        fontSize=9,
        textColor=colors.red,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )
    story.append(Paragraph("CONFIDENTIAL - FOR INTERNAL USE ONLY", confidential_style))
    story.append(Spacer(1, 0.4*inch))
    
    # Process the AI report text
    lines = internal_ai_report.split('\n')
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # Detect section headers (lines that start with numbers or are in ALL CAPS)
        if (line and (line[0].isdigit() and '.' in line[:3])) or line.isupper():
            # This is a section header
            story.append(Spacer(1, 0.15*inch))
            story.append(Paragraph(line, subheading_style))
        else:
            # Regular paragraph
            story.append(Paragraph(line, body_style))
    
    story.append(Spacer(1, 0.5*inch))
    
    # ===== PAGE BREAK BEFORE METRICS =====
    story.append(PageBreak())  
    
    # ===== OPERATIONAL METRICS TABLE =====
    story.append(Paragraph("Operational Performance Summary", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    operational_data = [
        ['Metric', 'Value'],
        ['Total Reports Completed', f"{internal_metrics['operations']['total_reports']:,}"],
        ['Average Turnaround Time', f"{internal_metrics['operations']['avg_turnaround_days']:.1f} days"],
        ['Fast Track Reports', f"{internal_metrics['operations']['pct_fast_track']:.1f}%"],
        ['Delayed Reports', f"{internal_metrics['operations']['delayed_count']} ({internal_metrics['operations']['pct_delayed']:.1f}%)"],
        ['Reports At Risk', f"{internal_metrics['operations']['reports_at_risk']}"],
        ['Active Valuers', f"{internal_metrics['valuers']['active_valuers']}"],
        ['Avg Reports per Valuer', f"{internal_metrics['valuers']['avg_reports_per_valuer']:.1f}"],
    ]
    
    operational_table = Table(operational_data, colWidths=[3.5*inch, 2.5*inch])
    operational_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 11),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(operational_table)
    story.append(Spacer(1, 0.3*inch))
    
    # ===== CLIENT RETENTION METRICS TABLE =====
    story.append(Paragraph("Client Retention & Engagement", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    client_data = [
        ['Metric', 'Value'],
        ['Total Unique Clients', f"{internal_metrics['client_retention']['total_unique_clients']:,}"],
        ['Repeat Client Rate', f"{internal_metrics['client_retention']['repeat_rate']:.1f}%"],
        ['One-time Client Rate', f"{internal_metrics['client_retention']['onetime_rate']:.1f}%"],
        ['Avg Reports per Client', f"{internal_metrics['client_retention']['avg_reports_per_client']:.1f}"],
        ['Max Reports per Client', f"{internal_metrics['client_retention']['max_reports_per_client']}"],
    ]
    
    client_table = Table(client_data, colWidths=[3.5*inch, 2.5*inch])
    client_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 11),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(client_table)
    story.append(Spacer(1, 0.3*inch))
    
    # ===== DATA QUALITY METRICS TABLE =====
    story.append(Paragraph("Data Quality & Compliance", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    quality_data = [
        ['Metric', 'Value'],
        ['Average Completeness Score', f"{internal_metrics['data_quality']['avg_completeness_score']:.1f}%"],
        ['Complete Reports', f"{internal_metrics['data_quality']['pct_complete']:.1f}%"],
        ['Partial Reports', f"{internal_metrics['data_quality']['partial_reports']}"],
        ['Incomplete Reports', f"{internal_metrics['data_quality']['incomplete_reports']}"],
        ['Missing Land Reg Numbers', f"{internal_metrics['data_quality']['missing_land_reg']}"],
        ['Missing Encumbrance Data', f"{internal_metrics['data_quality']['missing_encumbrance']}"],
    ]
    
    quality_table = Table(quality_data, colWidths=[3.5*inch, 2.5*inch])
    quality_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 11),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(quality_table)
    story.append(Spacer(1, 0.3*inch))
    
    # ===== TOP PERFORMERS TABLE =====
    story.append(Paragraph("Top Performing Valuers", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    # Get top 5 valuers by report count
    valuer_performance = internal_metrics['valuers']['performance_by_valuer']
    top_valuers = sorted(valuer_performance.items(), 
                        key=lambda x: x[1]['reports_count'], 
                        reverse=True)[:5]
    
    valuer_data = [['Valuer Name', 'Reports', 'Avg Turnaround (days)', 'Completeness %']]
    for valuer_name, stats in top_valuers:
        valuer_data.append([
            valuer_name,
            f"{stats['reports_count']}",
            f"{stats['avg_turnaround']:.1f}",
            f"{stats['avg_completeness']:.1f}%"
        ])
    
    valuer_table = Table(valuer_data, colWidths=[2.5*inch, 1*inch, 1.5*inch, 1.5*inch])
    valuer_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 10),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 9),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(valuer_table)
    
    # ===== BUILD PDF =====
    doc.build(story)
    print(f"‚úì Internal PDF report created successfully: {filename}")


# Create the internal PDF report
create_internal_pdf_report(
    internal_metrics, 
    internal_ai_report, 
    filename="nw_realite_internal_operations_report.pdf"
)

print("\n" + "="*80)
print("PDF REPORTS GENERATED")
print("="*80)
print("\nüìÑ Two separate PDF reports created:")
print("\n1. Business Performance Report")
print("   File: nw_realite_report.pdf")
print("   Focus: External stakeholders, portfolio performance")
print("\n2. Internal Operations Report")
print("   File: nw_realite_internal_operations_report.pdf")
print("   Focus: Management, team performance, operations")
print("   Note: Marked as CONFIDENTIAL")

In [None]:
# Ensure df, metrics, ai_report, and chart_buffer exist
if 'df' not in globals() or df is None:
    raise ValueError("Dataframe 'df' is missing.")

if 'metrics' not in globals():
    raise ValueError("Metrics object is missing.")

if 'ai_report' not in globals() or ai_report is None:
    raise ValueError("AI report missing. Run generate_ai_report() first.")

if 'chart_buffer' not in globals():
    raise ValueError("Chart buffer missing. Generate chart first.")

output_path = "nw_realite_report.pdf"

print("Generating PDF report...")

create_pdf_report(
    metrics=metrics,
    ai_report=ai_report,
    chart_buffer=chart_buffer,
    filename=output_path
)

print(f"‚úì PDF report created successfully: {output_path}")


In [34]:
# ====================== CELL 1: PRINT ALL EXTERNAL METRICS ======================

# Calculate metrics
metrics = calculate_all_metrics(df)

print("\n" + "="*120)
print("üìä EXTERNAL REPORT ‚Äî FULL METRICS DUMP")
print("="*120)

for section_name, section_values in metrics.items():
    print(f"\nüî∑ {section_name.upper()} METRICS")
    print("-" * 80)
    
    if isinstance(section_values, dict):
        for key, value in section_values.items():
            print(f"{key}: {value}")
    else:
        print(section_values)

print("\n" + "="*120)
print("SUMMARY COUNTS")
print("="*120)
print(f"Portfolio metrics: {len(metrics['portfolio'])}")
print(f"Temporal metrics: {len(metrics['temporal'])}")
print(f"Client metrics: {len(metrics['clients'])}")
print(f"Geographic metrics: {len(metrics['geographic'])}")
print(f"Land Use metrics: {len(metrics['land_use'])}")
print(f"Risk metrics: {len(metrics['risk'])}")
print(f"Tenure metrics: {len(metrics['tenure'])}")
print(f"Value Per Acre metrics: {len(metrics['value_per_acre'])}")
print(f"Characteristics metrics: {len(metrics['characteristics'])}")
print(f"Performance metrics: {len(metrics['performance'])}")

print("\n‚úì External report metrics printed successfully.\n")


‚úì All metrics calculated successfully

üìä EXTERNAL REPORT ‚Äî FULL METRICS DUMP

üî∑ PORTFOLIO METRICS
--------------------------------------------------------------------------------
total_properties: 117
total_value: 12627400000
average_value: 107926495.72649573
median_value: 50000000.0
std_value: 169238050.76402676
total_acres: 691.29354
total_hectares: 276.3411
average_acres: 5.908491794871795
cv: 156.80862203930445

üî∑ TEMPORAL METRICS
--------------------------------------------------------------------------------
mom_growth_count: -50.0
mom_growth_value: 129.00763358778627
ytd_properties: 117
ytd_value: 12627400000
avg_days_to_valuation: 20.153846153846153
median_days_to_valuation: 8.0
current_month_props: 1
last_month_props: 2

üî∑ CLIENTS METRICS
--------------------------------------------------------------------------------
by_type: {'Bank': {'property_id': 80, 'market_value_amount': 8047500000}, 'Corporate': {'property_id': 13, 'market_value_amount': 2612000000}, 'I

In [35]:
# ====================== CELL 2: PRINT ALL INTERNAL METRICS ======================

internal_metrics = calculate_internal_metrics(df)

print("\n" + "="*120)
print("üìò INTERNAL REPORT ‚Äî FULL METRICS DUMP")
print("="*120)

for section_name, section_values in internal_metrics.items():
    print(f"\nüî∂ {section_name.upper()} METRICS")
    print("-" * 80)
    
    if isinstance(section_values, dict):
        for key, value in section_values.items():
            print(f"{key}: {value}")
    else:
        print(section_values)

print("\n" + "="*120)
print("SUMMARY COUNTS")
print("="*120)
print(f"Operations metrics: {len(internal_metrics['operations'])}")
print(f"Valuer metrics: {len(internal_metrics['valuers'])}")
print(f"Client Retention metrics: {len(internal_metrics['client_retention'])}")
print(f"Workload metrics: {len(internal_metrics['workload'])}")
print(f"Geographic Coverage metrics: {len(internal_metrics['geographic_coverage'])}")
print(f"Complexity metrics: {len(internal_metrics['complexity'])}")
print(f"Capacity metrics: {len(internal_metrics['capacity'])}")

print("\n" + "="*120)
print("KEY INTERNAL HIGHLIGHTS")
print("="*120)

print(f"\n‚è± Turnaround Performance:")
print(f"   - Average turnaround days: {internal_metrics['operations']['avg_turnaround_days']:.1f}")
print(f"   - Fast Track %: {internal_metrics['operations']['pct_fast_track']:.1f}%")
print(f"   - Delayed %: {internal_metrics['operations']['pct_delayed']:.1f}%")

print(f"\nüë• Valuer Performance:")
print(f"   - Active valuers: {internal_metrics['valuers']['active_valuers']}")
print(f"   - Avg reports/valuer: {internal_metrics['valuers']['avg_reports_per_valuer']:.1f}")
print(f"   - Top performer: {internal_metrics['valuers']['top_performer']}")

print(f"\nüîÅ Client Retention:")
print(f"   - Repeat client rate: {internal_metrics['client_retention']['repeat_rate']:.1f}%")
print(f"   - Unique clients: {internal_metrics['client_retention']['total_unique_clients']}")

print("\n‚úì Internal report metrics printed successfully.\n")


‚úì Internal metrics calculated successfully

üìò INTERNAL REPORT ‚Äî FULL METRICS DUMP

üî∂ OPERATIONS METRICS
--------------------------------------------------------------------------------
total_reports: 117
avg_turnaround_days: 20.153846153846153
median_turnaround_days: 8.0
std_turnaround_days: 23.409258183468037
fast_track_count: 69
standard_count: 20
delayed_count: 28
pct_fast_track: 58.97435897435898
pct_delayed: 23.931623931623932
reports_at_risk: 28

üî∂ VALUERS METRICS
--------------------------------------------------------------------------------
active_valuers: 2
avg_reports_per_valuer: 58.5
performance_by_valuer: {'Danish Onyango Orech': {'reports_count': 74, 'avg_turnaround': 18.364864864864863, 'median_turnaround': 8.0, 'total_value_handled': 7151700000}, 'Simon Oruka Orwa': {'reports_count': 43, 'avg_turnaround': 23.232558139534884, 'median_turnaround': 8.0, 'total_value_handled': 5475700000}}
turnaround_by_valuer: {'Danish Onyango Orech': {'Delayed': 17, 'Fast Tra