In [86]:
"""
NW Realite - Property Valuation Analysis & AI Report Generator
Comprehensive metrics calculation, AI-powered insights, and PDF export
"""

# !pip install pandas numpy matplotlib seaborn supabase python-dotenv google-generativeai reportlab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import json
import warnings
import os
from supabase import create_client, Client
import google.generativeai as genai
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from io import BytesIO
import base64

warnings.filterwarnings('ignore')

print("✓ All dependencies imported successfully")


✓ All dependencies imported successfully


In [87]:

# Supabase Configuration
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Gemini Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# Report Configuration
COMPANY_NAME = "NW Realite - Property Valuation Analysis Report"
REPORT_DATE = datetime.now().strftime("%B %d, %Y")
TABLE_NAME = "valuations_2025_clean_v2"

print(f" Configuration loaded for {COMPANY_NAME}")
print(f" Report date: {REPORT_DATE}")


 Configuration loaded for NW Realite - Property Valuation Analysis Report
 Report date: December 04, 2025


In [88]:
def fetch_valuation_data():
    """Fetch all property valuation data from Supabase"""
    try:
        supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
        response = supabase.table(TABLE_NAME).select("*").execute()
        df = pd.DataFrame(response.data)
        print(f"✓ Successfully fetched {len(df)} records from Supabase")
        return df
    except Exception as e:
        print(f"✗ Error fetching data: {e}")
        return None

# Fetch data
df = fetch_valuation_data()

if df is not None:
    print(f"\nDataset Shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
else:
    print("Failed to fetch data. Please check your credentials.")


✓ Successfully fetched 117 records from Supabase

Dataset Shape: (117, 23)
Columns: ['property_id', 'report_reference', 'client_name', 'valuer_name', 'inspection_date', 'valuation_date', 'location_county', 'location_description', 'location_coordinates', 'plot_area_hectares', 'plot_area_acres', 'land_use', 'plot_shape', 'soil_type', 'gradient', 'tenure_type', 'registered_proprietor', 'market_value_amount', 'metadata', 'created_at', 'land_reg_number', 'encumbrance_category', 'id']


In [89]:
def prepare_data(df):
    """Clean and prepare data with derived features"""
    
    # Convert dates
    df['inspection_date'] = pd.to_datetime(df['inspection_date'])
    df['valuation_date'] = pd.to_datetime(df['valuation_date'])
    df['created_at'] = pd.to_datetime(df['created_at'])
    
    # Extract time features
    df['inspection_year'] = df['inspection_date'].dt.year
    df['inspection_month'] = df['inspection_date'].dt.month
    df['inspection_month_name'] = df['inspection_date'].dt.strftime('%b %Y')
    df['inspection_quarter'] = df['inspection_date'].dt.quarter
    df['inspection_day_of_week'] = df['inspection_date'].dt.day_name()
    
    # Calculate days between inspection and valuation
    df['days_to_valuation'] = (df['valuation_date'] - df['inspection_date']).dt.days
    
    # Value per acre (handle division by zero)
    df['value_per_acre'] = df['market_value_amount'] / df['plot_area_acres'].replace(0, np.nan)
    
    # Property value categories
    df['value_category'] = pd.cut(
        df['market_value_amount'],
        bins=[0, 10_000_000, 50_000_000, 100_000_000, 500_000_000, float('inf')],
        labels=['Under 10M', '10M-50M', '50M-100M', '100M-500M', 'Above 500M']
    )
    
    # Land size categories
    df['land_size_category'] = pd.cut(
        df['plot_area_acres'],
        bins=[0, 1, 5, 10, 50, float('inf')],
        labels=['<1 Acre', '1-5 Acres', '5-10 Acres', '10-50 Acres', '50+ Acres']
    )
    
    # Client type extraction
    def categorize_client(name):
        name_lower = str(name).lower()
        if 'bank' in name_lower:
            return 'Bank'
        elif 'sacco' in name_lower:
            return 'Sacco'
        elif 'limited' in name_lower or 'ltd' in name_lower:
            return 'Corporate'
        else:
            return 'Individual'
    
    df['client_type'] = df['client_name'].apply(categorize_client)
    
    # Has encumbrance (simplified)
    df['has_encumbrance'] = df['encumbrance_category'].apply(
        lambda x: 'Yes' if x == 'Has Charges' else 'No' if x == 'None' else 'Unknown'
    )
    
    print("✓ Data preparation complete")
    return df

# Prepare data
df = prepare_data(df)

print(f"\nNew derived columns added:")
print(f"- Time features: inspection_year, inspection_month, inspection_quarter")
print(f"- Value metrics: value_per_acre, value_category")
print(f"- Categories: land_size_category, client_type, has_encumbrance")


✓ Data preparation complete

New derived columns added:
- Time features: inspection_year, inspection_month, inspection_quarter
- Value metrics: value_per_acre, value_category
- Categories: land_size_category, client_type, has_encumbrance


In [90]:
def calculate_all_metrics(df):
    """Calculate comprehensive metrics for the report"""
    
    metrics = {}
    
    # ===== 1. PORTFOLIO OVERVIEW =====
    metrics['portfolio'] = {
        'total_properties': len(df),
        'total_value': df['market_value_amount'].sum(),
        'average_value': df['market_value_amount'].mean(),
        'median_value': df['market_value_amount'].median(),
        'std_value': df['market_value_amount'].std(),
        'total_acres': df['plot_area_acres'].sum(),
        'total_hectares': df['plot_area_hectares'].sum(),
        'average_acres': df['plot_area_acres'].mean(),
        'cv': (df['market_value_amount'].std() / df['market_value_amount'].mean()) * 100
    }
    
    # ===== 2. TEMPORAL METRICS =====
    current_month = df['inspection_date'].max().replace(day=1)
    last_month = current_month - timedelta(days=1)
    last_month = last_month.replace(day=1)
    
    current_month_props = len(df[df['inspection_date'] >= current_month])
    last_month_props = len(df[(df['inspection_date'] >= last_month) & 
                               (df['inspection_date'] < current_month)])
    
    current_month_value = df[df['inspection_date'] >= current_month]['market_value_amount'].sum()
    last_month_value = df[(df['inspection_date'] >= last_month) & 
                          (df['inspection_date'] < current_month)]['market_value_amount'].sum()
    
    metrics['temporal'] = {
        'mom_growth_count': ((current_month_props - last_month_props) / last_month_props * 100) if last_month_props > 0 else 0,
        'mom_growth_value': ((current_month_value - last_month_value) / last_month_value * 100) if last_month_value > 0 else 0,
        'ytd_properties': len(df[df['inspection_date'].dt.year == datetime.now().year]),
        'ytd_value': df[df['inspection_date'].dt.year == datetime.now().year]['market_value_amount'].sum(),
        'avg_days_to_valuation': df['days_to_valuation'].mean(),
        'median_days_to_valuation': df['days_to_valuation'].median(),
        'current_month_props': current_month_props,
        'last_month_props': last_month_props
    }
    
    # ===== 3. CLIENT METRICS =====
    client_breakdown = df.groupby('client_type').agg({
        'property_id': 'count',
        'market_value_amount': 'sum'
    }).to_dict('index')
    
    top_clients = df.groupby('client_name').agg({
        'property_id': 'count',
        'market_value_amount': 'sum'
    }).sort_values('market_value_amount', ascending=False).head(5)
    
    total_value = df['market_value_amount'].sum()
    top5_value = top_clients['market_value_amount'].sum()
    
    metrics['clients'] = {
        'by_type': client_breakdown,
        'top_5_clients': top_clients.to_dict('index'),
        'unique_clients': df['client_name'].nunique(),
        'concentration_ratio': (top5_value / total_value * 100) if total_value > 0 else 0,
        'avg_value_per_client': df.groupby('client_name')['market_value_amount'].sum().mean()
    }
    
    # ===== 4. GEOGRAPHIC METRICS =====
    county_analysis = df.groupby('location_county').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    county_analysis.columns = ['count', 'total_value', 'avg_value']
    county_analysis = county_analysis.sort_values('total_value', ascending=False)
    
    metrics['geographic'] = {
        'num_counties': df['location_county'].nunique(),
        'top_5_counties': county_analysis.head(5).to_dict('index'),
        'concentration': (county_analysis.head(5)['total_value'].sum() / total_value * 100) if total_value > 0 else 0,
        'highest_avg_county': county_analysis['avg_value'].idxmax(),
        'most_active_county': county_analysis['count'].idxmax()
    }
    
    # ===== 5. LAND USE METRICS =====
    land_use_analysis = df.groupby('land_use').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    land_use_analysis.columns = ['count', 'total_value', 'avg_value']
    land_use_analysis['pct_value'] = (land_use_analysis['total_value'] / total_value * 100)
    
    metrics['land_use'] = {
        'breakdown': land_use_analysis.to_dict('index'),
        'dominant': land_use_analysis['total_value'].idxmax(),
        'highest_avg': land_use_analysis['avg_value'].idxmax()
    }
    
    # ===== 6. VALUE DISTRIBUTION =====
    value_cat_dist = df['value_category'].value_counts().to_dict()
    metrics['value_distribution'] = {
        'by_category': value_cat_dist,
        'top_10_pct_value': df.nlargest(int(len(df) * 0.1), 'market_value_amount')['market_value_amount'].sum() / total_value * 100
    }
    
    # ===== 7. RISK & ENCUMBRANCE =====
    encumbrance_analysis = df.groupby('encumbrance_category').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    encumbrance_analysis.columns = ['count', 'total_value', 'avg_value']
    
    metrics['risk'] = {
        'encumbrance_breakdown': encumbrance_analysis.to_dict('index'),
        'pct_encumbered': (len(df[df['encumbrance_category'] == 'Has Charges']) / len(df) * 100),
        'value_at_risk': df[df['encumbrance_category'] == 'Has Charges']['market_value_amount'].sum(),
        'pct_value_encumbered': (df[df['encumbrance_category'] == 'Has Charges']['market_value_amount'].sum() / total_value * 100)
    }
    
    # ===== 8. TENURE METRICS =====
    tenure_analysis = df.groupby('tenure_type').agg({
        'property_id': 'count',
        'market_value_amount': ['sum', 'mean']
    })
    tenure_analysis.columns = ['count', 'total_value', 'avg_value']
    
    metrics['tenure'] = tenure_analysis.to_dict('index')
    
    # ===== 9. VALUE PER ACRE =====
    vpa_df = df[df['value_per_acre'].notna()]
    
    vpa_by_county = vpa_df.groupby('location_county')['value_per_acre'].mean().sort_values(ascending=False).head(5)
    vpa_by_landuse = vpa_df.groupby('land_use')['value_per_acre'].mean().sort_values(ascending=False)
    
    metrics['value_per_acre'] = {
        'overall_avg': vpa_df['value_per_acre'].mean(),
        'overall_median': vpa_df['value_per_acre'].median(),
        'by_county_top5': vpa_by_county.to_dict(),
        'by_land_use': vpa_by_landuse.to_dict()
    }
    
    # ===== 10. PROPERTY CHARACTERISTICS =====
    metrics['characteristics'] = {
        'soil_type': df['soil_type'].value_counts().to_dict(),
        'plot_shape': df['plot_shape'].value_counts().to_dict(),
        'gradient': df['gradient'].value_counts().to_dict(),
        'most_common_soil': df['soil_type'].mode()[0] if not df['soil_type'].mode().empty else 'N/A',
        'most_common_shape': df['plot_shape'].mode()[0] if not df['plot_shape'].mode().empty else 'N/A'
    }
    
    # ===== 11. PERFORMANCE METRICS =====
    metrics['performance'] = {
        'num_valuers': df['valuer_name'].nunique(),
        'avg_properties_per_valuer': len(df) / df['valuer_name'].nunique(),
        'most_active_valuer': df['valuer_name'].value_counts().index[0],
        'monthly_avg_inspections': len(df) / df['inspection_month'].nunique()
    }
    
    print("✓ All metrics calculated successfully")
    return metrics

# Calculate metrics
metrics = calculate_all_metrics(df)

print("\n METRICS SUMMARY:")
print(f"Portfolio: {len(metrics['portfolio'])} metrics")
print(f"Temporal: {len(metrics['temporal'])} metrics")
print(f"Clients: {len(metrics['clients'])} metrics")
print(f"Geographic: {len(metrics['geographic'])} metrics")
print(f"Land Use: {len(metrics['land_use'])} metrics")
print(f"Risk: {len(metrics['risk'])} metrics")


✓ All metrics calculated successfully

 METRICS SUMMARY:
Portfolio: 9 metrics
Temporal: 8 metrics
Clients: 5 metrics
Geographic: 5 metrics
Land Use: 3 metrics
Risk: 4 metrics


In [91]:
def create_summary_charts(df, metrics):
    """Create key visualizations for the report"""
    
    # Set style
    plt.style.use('seaborn-v0_8-whitegrid')
    sns.set_palette("Set2")
    
    # Create figure with 6 subplots
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    fig.suptitle('NW Realite - Property Valuation Dashboard', fontsize=16, fontweight='bold', y=0.995)
    
    # 1. Top 10 Counties by Value
    county_value = df.groupby('location_county')['market_value_amount'].sum().sort_values(ascending=False).head(10)
    axes[0, 0].barh(range(len(county_value)), county_value.values / 1_000_000, color='steelblue')
    axes[0, 0].set_yticks(range(len(county_value)))
    axes[0, 0].set_yticklabels(county_value.index, fontsize=9)
    axes[0, 0].set_xlabel('Total Value (KShs Millions)', fontsize=9)
    axes[0, 0].set_title('Top 10 Counties by Total Value', fontsize=10, fontweight='bold')
    axes[0, 0].invert_yaxis()
    
    # 2. Land Use Distribution
    land_use_value = df.groupby('land_use')['market_value_amount'].sum().sort_values(ascending=False)
    colors = sns.color_palette('pastel')[0:len(land_use_value)]
    axes[0, 1].pie(land_use_value, labels=land_use_value.index, autopct='%1.1f%%', 
                   colors=colors, startangle=90)
    axes[0, 1].set_title('Portfolio by Land Use', fontsize=10, fontweight='bold')
    
    # 3. Client Type Distribution
    client_type_value = df.groupby('client_type')['market_value_amount'].sum().sort_values(ascending=False)
    axes[0, 2].bar(range(len(client_type_value)), client_type_value.values / 1_000_000, 
                   color='coral', edgecolor='black')
    axes[0, 2].set_xticks(range(len(client_type_value)))
    axes[0, 2].set_xticklabels(client_type_value.index, fontsize=9)
    axes[0, 2].set_ylabel('Total Value (KShs Millions)', fontsize=9)
    axes[0, 2].set_title('Total Value by Client Type', fontsize=10, fontweight='bold')
    axes[0, 2].grid(axis='y', alpha=0.3)
    
    # 4. Monthly Inspection Trend
    monthly_trend = df.groupby(df['inspection_date'].dt.to_period('M')).size()
    monthly_trend.index = monthly_trend.index.to_timestamp()
    axes[1, 0].plot(monthly_trend.index, monthly_trend.values, marker='o', 
                    linewidth=2, markersize=6, color='green')
    axes[1, 0].set_xlabel('Month', fontsize=9)
    axes[1, 0].set_ylabel('Properties Inspected', fontsize=9)
    axes[1, 0].set_title('Monthly Inspection Trend', fontsize=10, fontweight='bold')
    axes[1, 0].tick_params(axis='x', rotation=45)
    axes[1, 0].grid(alpha=0.3)
    
    # 5. Encumbrance Status
    encumbrance_counts = df['encumbrance_category'].value_counts()
    colors_enc = {'None': 'green', 'Has Charges': 'orange', 'Unascertained': 'gray'}
    bar_colors = [colors_enc.get(x, 'blue') for x in encumbrance_counts.index]
    axes[1, 1].bar(range(len(encumbrance_counts)), encumbrance_counts.values, 
                   color=bar_colors, edgecolor='black')
    axes[1, 1].set_xticks(range(len(encumbrance_counts)))
    axes[1, 1].set_xticklabels(encumbrance_counts.index, fontsize=9)
    axes[1, 1].set_ylabel('Count', fontsize=9)
    axes[1, 1].set_title('Encumbrance Status', fontsize=10, fontweight='bold')
    axes[1, 1].grid(axis='y', alpha=0.3)
    
    # 6. Value Distribution
    value_cat_counts = df['value_category'].value_counts().sort_index()
    axes[1, 2].bar(range(len(value_cat_counts)), value_cat_counts.values, 
                   color='mediumpurple', edgecolor='black')
    axes[1, 2].set_xticks(range(len(value_cat_counts)))
    axes[1, 2].set_xticklabels(value_cat_counts.index, fontsize=8, rotation=45, ha='right')
    axes[1, 2].set_ylabel('Count', fontsize=9)
    axes[1, 2].set_title('Property Value Categories', fontsize=10, fontweight='bold')
    axes[1, 2].grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    
    # Save to BytesIO
    img_buffer = BytesIO()
    plt.savefig(img_buffer, format='png', dpi=150, bbox_inches='tight')
    img_buffer.seek(0)
    plt.close()
    
    print("✓ Summary visualizations created")
    return img_buffer

# Create charts
chart_buffer = create_summary_charts(df, metrics)


✓ Summary visualizations created


In [92]:
def generate_ai_report(metrics, df):
    """Use Gemini 2.5 Flash to generate comprehensive report"""
    
    # Initialize Gemini model
    genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
    model = genai.GenerativeModel("gemini-2.5-flash")
    
    # Prepare metrics summary for AI
    metrics_summary = f"""
# PROPERTY VALUATION DATA SUMMARY

## Portfolio Overview
- Total Properties: {metrics['portfolio']['total_properties']}
- Total Market Value: KShs {metrics['portfolio']['total_value']:,.0f}
- Average Property Value: KShs {metrics['portfolio']['average_value']:,.0f}
- Median Property Value: KShs {metrics['portfolio']['median_value']:,.0f}
- Total Land Area: {metrics['portfolio']['total_acres']:.2f} acres
- Coefficient of Variation: {metrics['portfolio']['cv']:.1f}%

## Growth Metrics
- MoM Growth (Properties): {metrics['temporal']['mom_growth_count']:.1f}%
- MoM Growth (Value): {metrics['temporal']['mom_growth_value']:.1f}%
- YTD Properties: {metrics['temporal']['ytd_properties']}
- YTD Value: KShs {metrics['temporal']['ytd_value']:,.0f}
- Avg Days to Valuation: {metrics['temporal']['avg_days_to_valuation']:.1f} days

## Client Segmentation
{json.dumps(metrics['clients']['by_type'], indent=2)}
- Unique Clients: {metrics['clients']['unique_clients']}
- Top 5 Client Concentration: {metrics['clients']['concentration_ratio']:.1f}%

## Geographic Distribution
- Counties Covered: {metrics['geographic']['num_counties']}
- Geographic Concentration (Top 5): {metrics['geographic']['concentration']:.1f}%
- Most Active County: {metrics['geographic']['most_active_county']}

## Land Use Breakdown
{json.dumps(metrics['land_use']['breakdown'], indent=2)}

## Risk Metrics
- Properties with Encumbrances: {metrics['risk']['pct_encumbered']:.1f}%
- Value at Risk: KShs {metrics['risk']['value_at_risk']:,.0f}
- % Portfolio Value Encumbered: {metrics['risk']['pct_value_encumbered']:.1f}%

## Value per Acre
- Overall Average: KShs {metrics['value_per_acre']['overall_avg']:,.0f}/acre
- Overall Median: KShs {metrics['value_per_acre']['overall_median']:,.0f}/acre

## Performance
- Number of Valuers: {metrics['performance']['num_valuers']}
- Most Active Valuer: {metrics['performance']['most_active_valuer']}
- Monthly Avg Inspections: {metrics['performance']['monthly_avg_inspections']:.1f}
"""
    
    prompt = f"""
You are Samuel Wanyua, a Data Scientist at NW Realite. Based on the comprehensive data summary below, 
create a detailed 2-page executive report for stakeholders. 

IMPORTANT FORMATTING INSTRUCTIONS:
- Do NOT use markdown asterisks (*) for emphasis
- Use clear section headers with numbers
- Write in plain text paragraphs
- For bullet points, use simple dashes (-)
- Keep the tone professional and data-driven
- Date: 2nd December 2025
- Reference that the report is prepared by Samuel Wanyua, Data Scientist

{metrics_summary}

The report should be structured as follows:
Title
date - 4th Dec 2025
then  Prepared by Samuel Wanyua, Data Scientist, NW Realite

EXECUTIVE SUMMARY

1. Portfolio Overview
Provide a high-level summary in 2-3 paragraphs covering portfolio size, total value, and key characteristics.

2. Key Performance Indicators
List the most critical metrics in bullet points (use simple dashes).

3. Growth & Trends Analysis
Analyze growth trends, seasonal patterns, and inspection efficiency in 1-2 paragraphs.

4. Client & Geographic Insights
Discuss client concentration, diversification, and geographic distribution in 1-2 paragraphs.

DETAILED ANALYSIS & RECOMMENDATIONS

5. Risk Assessment
Analyze encumbrance levels, value at risk, and portfolio risk profile in 2 paragraphs.

6. Land Use & Value Analysis
Discuss land use distribution, value per acre trends, and property characteristics in 1-2 paragraphs.

7. Strategic Recommendations
Provide 5-7 actionable recommendations as numbered points. (use consistent font size as with others)

8. Key Takeaways
Summarize 3-5 most important insights as numbered points. (use consistent font size as with others)

CRITICAL: Do not use any markdown formatting (no *, **, #, etc.). Write in clean, professional prose with clear section numbers and simple bullet points using dashes with consistent font size throughout the document.
"""
    
    try:
        response = model.generate_content(prompt)
        report_text = response.text
        
        # Clean up any remaining markdown artifacts
        report_text = report_text.replace('**', '')
        report_text = report_text.replace('*', '')
        report_text = report_text.replace('##', '')
        report_text = report_text.replace('#', '')
        
        print("✓ AI report generated successfully")
        return report_text
        
    except Exception as e:
        print(f"✗ Error generating AI report: {e}")
        return None


# Generate AI report
ai_report = generate_ai_report(metrics, df)

if ai_report:
    print("\n" + "="*80)
    print("AI-GENERATED REPORT PREVIEW (First 500 characters):")
    print("="*80)
    print(ai_report[:500] + "...")
else:
    print("Failed to generate AI report. Check your Gemini API key.")

✓ AI report generated successfully

AI-GENERATED REPORT PREVIEW (First 500 characters):
Executive Report: Property Valuation Portfolio Analysis

2nd December 2025

Prepared by Samuel Wanyua, Data Scientist, NW Realite

EXECUTIVE SUMMARY

This report provides a comprehensive analysis of NW Realite's property valuation portfolio, reflecting significant growth in portfolio value despite a reduction in the number of properties valued month-over-month. The total market value stands at KShs 12.6 billion across 117 properties, demonstrating the company's capability in managing high-value ...


In [93]:
def create_pdf_report(metrics, ai_report, chart_buffer, filename="NW_Realite_Valuation_Report.pdf"):
    """Generate professional PDF report"""
    doc = SimpleDocTemplate(
        filename,
        pagesize=A4,
        topMargin=0.75*inch,
        bottomMargin=0.75*inch,
        leftMargin=0.75*inch,
        rightMargin=0.75*inch,
    )
    
    story = []
    styles = getSampleStyleSheet()
    
    # ===== CUSTOM STYLES =====
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Title'],
        fontSize=24,
        textColor=colors.HexColor("#000000"),
        spaceAfter=12,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )
    
    heading_style = ParagraphStyle(
        'CustomHeading',
        parent=styles['Heading1'],
        fontSize=14,
        textColor=colors.HexColor("#000000"),
        spaceAfter=10,
        spaceBefore=15,
        fontName='Helvetica-Bold'
    )
    
    subheading_style = ParagraphStyle(
        'CustomSubHeading',
        parent=styles['Heading2'],
        fontSize=11,
        textColor=colors.HexColor("#000000"),
        spaceAfter=6,
        spaceBefore=8,
        fontName='Helvetica-Bold'
    )
    
    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['BodyText'],
        fontSize=10,
        leading=14,
        alignment=TA_LEFT,
        spaceAfter=8
    )
    
    # ===== TITLE PAGE =====
    story.append(Paragraph(COMPANY_NAME, title_style))
    story.append(Spacer(1, 0.2*inch))
    story.append(Spacer(1, 0.4*inch))
    
    # Process the AI report text
    # Split by paragraphs and detect section headers
    lines = ai_report.split('\n')
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # Detect section headers (lines that start with numbers or are in ALL CAPS)
        if (line and (line[0].isdigit() and '.' in line[:3])) or line.isupper():
            # This is a section header
            story.append(Spacer(1, 0.15*inch))
            story.append(Paragraph(line, subheading_style))
        else:
            # Regular paragraph
            story.append(Paragraph(line, body_style))
    
    story.append(Spacer(1, 0.5*inch))
    
    # ===== PAGE BREAK BEFORE METRICS (OPTIONAL) =====
    story.append(PageBreak())  
    
    # ===== KEY METRICS TABLE (MOVED AFTER AI REPORT) =====
    story.append(Paragraph("Executive Summary - Key Metrics", heading_style))
    story.append(Spacer(1, 0.1*inch))
    
    key_metrics_data = [
        ['Metric', 'Value'],
        ['Total Properties', f"{metrics['portfolio']['total_properties']:,}"],
        ['Total Portfolio Value', f"KShs {metrics['portfolio']['total_value']:,.0f}"],
        ['Average Property Value', f"KShs {metrics['portfolio']['average_value']:,.0f}"],
        ['Total Land Area', f"{metrics['portfolio']['total_acres']:.2f} acres"],
        ['Properties with Encumbrances', f"{metrics['risk']['pct_encumbered']:.1f}%"],
        ['MoM Growth (Properties)', f"{metrics['temporal']['mom_growth_count']:.1f}%"],
        ['Avg Days to Valuation', f"{metrics['temporal']['avg_days_to_valuation']:.1f} days"],
    ]
    
    key_metrics_table = Table(key_metrics_data, colWidths=[3.5*inch, 2.5*inch])
    key_metrics_table.setStyle(TableStyle([
        # Header row styling
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor("#000000")),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, 0), 11),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('TOPPADDING', (0, 0), (-1, 0), 12),
        
        # Data rows styling - WHITE BACKGROUND
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        ('TOPPADDING', (0, 1), (-1, -1), 8),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
        
        # Grid and borders
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('LINEBELOW', (0, 0), (-1, 0), 2, colors.black),
    ]))
    
    story.append(key_metrics_table)
    story.append(Spacer(1, 0.3*inch))
    
    # ===== CHART IMAGE =====
    # story.append(Paragraph("Portfolio Trend Chart", heading_style))
    # story.append(Spacer(1, 0.1*inch))
    # chart_buffer.seek(0)
    # story.append(Image(chart_buffer, width=6*inch, height=3.5*inch))
    
    # ===== BUILD PDF =====
    doc.build(story)
    print(f"✓ PDF report created successfully: {filename}")


# Create the PDF report
create_pdf_report(metrics, ai_report, chart_buffer, filename="nw_realite_report.pdf")

✓ PDF report created successfully: nw_realite_report.pdf


In [58]:
# Ensure df, metrics, ai_report, and chart_buffer exist
if 'df' not in globals() or df is None:
    raise ValueError("Dataframe 'df' is missing.")

if 'metrics' not in globals():
    raise ValueError("Metrics object is missing.")

if 'ai_report' not in globals() or ai_report is None:
    raise ValueError("AI report missing. Run generate_ai_report() first.")

if 'chart_buffer' not in globals():
    raise ValueError("Chart buffer missing. Generate chart first.")

output_path = "nw_realite_report.pdf"

print("Generating PDF report...")

create_pdf_report(
    metrics=metrics,
    ai_report=ai_report,
    chart_buffer=chart_buffer,
    filename=output_path
)

print(f"✓ PDF report created successfully: {output_path}")


Generating PDF report...
✓ PDF report created: nw_realite_report.pdf
✓ PDF report created successfully: nw_realite_report.pdf
