In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
import matplotlib.ticker as mticker
import warnings
import os

warnings.filterwarnings('ignore')

# =============================================================================
# SETUP & CONFIGURATION
# =============================================================================

CHARTS_DIR = 'charts'
if not os.path.exists(CHARTS_DIR):
    os.makedirs(CHARTS_DIR)
    print(f"Created directory: {CHARTS_DIR}/\n")

# Dark theme styling
plt.style.use('dark_background')
plt.rcParams.update({
    'figure.facecolor': '#0D1117',
    'axes.facecolor': '#0D1117',
    'axes.edgecolor': '#30363D',
    'axes.labelcolor': 'white',
    'text.color': 'white',
    'xtick.color': 'white',
    'ytick.color': 'white',
    'font.size': 12,
    'axes.titlesize': 16,
    'axes.labelsize': 12,
    'grid.color': '#30363D',
    'grid.alpha': 0.3,
    'legend.facecolor': '#161B22',
    'legend.edgecolor': '#30363D'
})

# Color palettes
BOROUGH_COLORS = {
    'Manhattan': '#E63946',
    'Brooklyn': '#457B9D',
    'Queens': '#2A9D8F',
    'Bronx': '#9B59B6',
    'Staten Island': '#F4A261'
}

PRICE_TIER_COLORS = {
    'Under $300K': '#2ECC71',
    '$300K-600K': '#3498DB',
    '$600K-900K': '#9B59B6',
    '$900K-1.5M': '#E74C3C',
    '$1.5M-3M': '#E67E22',
    'Over $3M': '#F1C40F'
}

AGE_COLORS = {
    'New (0-10)': '#00D4AA',
    'Recent (10-25)': '#00B4D8',
    'Modern (25-50)': '#4361EE',
    'Mature (50-75)': '#7209B7',
    'Old (75-100)': '#E63946',
    'Historic (100+)': '#F4A261'
}

# =============================================================================
# LOAD DATA
# =============================================================================

print("="*80)
print("NYC HOUSING MARKET - COMPREHENSIVE CHART ANALYSIS")
print("="*80 + "\n")

# Load main dataset
try:
    df = pd.read_csv('nyc_housing_processed.csv')
    print(f"‚úì Main dataset loaded: {len(df):,} records")
except FileNotFoundError:
    print("ERROR: 'nyc_housing_processed.csv' not found!")
    exit()

# Load residential dataset
try:
    df_res = pd.read_csv('nyc_housing_residential_units.csv')
    print(f"‚úì Residential dataset loaded: {len(df_res):,} records")
except FileNotFoundError:
    print("‚ö† Residential file not found, creating from main dataset...")
    df_res = df[df['residential_units'] > 0].copy() if 'residential_units' in df.columns else df.copy()
    # Create price_per_sqft if not exists
    if 'price_per_sqft' not in df_res.columns and 'bldgarea' in df_res.columns:
        df_res['price_per_sqft'] = df_res['sale_price'] / df_res['bldgarea']
        df_res = df_res[(df_res['price_per_sqft'] >= 100) & (df_res['price_per_sqft'] <= 3000)]
    print(f"‚úì Created residential subset: {len(df_res):,} records")

# Data summary
print(f"\n{'‚îÄ'*50}")
print("DATA SUMMARY:")
print(f"{'‚îÄ'*50}")
print(f"Total Properties: {len(df):,}")
print(f"Boroughs: {df['borough_name'].nunique()}")
print(f"Price Range: ${df['sale_price'].min():,.0f} - ${df['sale_price'].max():,.0f}")
print(f"Median Price: ${df['sale_price'].median():,.0f}")
if 'building_category' in df.columns:
    print(f"Building Categories: {df['building_category'].nunique()}")
print(f"{'‚îÄ'*50}\n")

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

def save_chart(filename, chart_num=None):
    """Save chart to file with consistent formatting"""
    filepath = os.path.join(CHARTS_DIR, filename)
    plt.tight_layout()
    plt.savefig(filepath, dpi=150, facecolor='#0D1117', bbox_inches='tight')
    plt.close()
    if chart_num:
        print(f"  Chart {chart_num:02d}: ‚úì {filename}")
    else:
        print(f"  ‚úì {filename}")

def format_currency(x, pos):
    """Format axis labels as currency"""
    if x >= 1e6:
        return f'${x/1e6:.1f}M'
    elif x >= 1e3:
        return f'${x/1e3:.0f}K'
    else:
        return f'${x:.0f}'

def add_value_labels(ax, bars, fmt='{:,.0f}', offset=0, fontsize=10, rotation=0):
    """Add value labels to bar charts"""
    for bar in bars:
        height = bar.get_height()
        if pd.notna(height) and height > 0:
            ax.text(bar.get_x() + bar.get_width()/2, height + offset,
                   fmt.format(height), ha='center', va='bottom', 
                   fontsize=fontsize, fontweight='bold', rotation=rotation)

def style_axis(ax, title, xlabel=None, ylabel=None, grid=True):
    """Apply consistent styling to axes"""
    ax.set_title(title, fontsize=18, fontweight='bold', pad=20)
    if xlabel:
        ax.set_xlabel(xlabel, fontsize=14)
    if ylabel:
        ax.set_ylabel(ylabel, fontsize=14)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    if grid:
        ax.grid(True, alpha=0.2, axis='y')

# =============================================================================
# CHART COUNTER
# =============================================================================
chart_num = 0

print("Creating charts...\n")

# =============================================================================
# SECTION 1: INFOGRAPHIC DASHBOARDS
# =============================================================================

print("‚îÄ" * 50)
print("SECTION 1: INFOGRAPHIC DASHBOARDS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 01: MAIN INFOGRAPHIC DASHBOARD
# -----------------------------------------------------------------------------
chart_num += 1

fig = plt.figure(figsize=(24, 16))
fig.suptitle('NYC HOUSING MARKET ANALYSIS DASHBOARD', fontsize=36, fontweight='bold', 
             y=0.98, color='white')

gs = GridSpec(5, 6, figure=fig, hspace=0.4, wspace=0.3, 
              top=0.92, bottom=0.05, left=0.05, right=0.95)

# --- ROW 1: KEY METRICS (6 boxes) ---
metrics = [
    ('TOTAL SALES', f"{len(df):,}", 'Properties', '#E63946', 'üè†'),
    ('MEDIAN PRICE', f"${df['sale_price'].median()/1e6:.2f}M", 'All Boroughs', '#2A9D8F', 'üí∞'),
    ('MEAN PRICE', f"${df['sale_price'].mean()/1e6:.2f}M", 'All Boroughs', '#457B9D', 'üìä'),
    ('TOTAL VALUE', f"${df['sale_price'].sum()/1e9:.1f}B", 'Market Size', '#9B59B6', 'üíé'),
    ('AVG $/SQFT', f"${df_res['price_per_sqft'].median():.0f}" if 'price_per_sqft' in df_res.columns else 'N/A', 
     'Residential', '#F4A261', 'üìè'),
    ('MEDIAN AGE', f"{df['building_age'].median():.0f} yrs" if 'building_age' in df.columns else 'N/A', 
     'Buildings', '#00D4AA', 'üèõÔ∏è')
]

for i, (title, value, subtitle, color, icon) in enumerate(metrics):
    ax = fig.add_subplot(gs[0, i])
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis('off')
    
    # Background
    rect = mpatches.FancyBboxPatch((0.02, 0.05), 0.96, 0.9, 
                                    boxstyle="round,pad=0.02", 
                                    facecolor='#161B22', 
                                    edgecolor=color, linewidth=3)
    ax.add_patch(rect)
    
    ax.text(0.5, 0.78, icon, ha='center', va='center', fontsize=32)
    ax.text(0.5, 0.52, value, ha='center', va='center', fontsize=22, 
            fontweight='bold', color=color)
    ax.text(0.5, 0.30, title, ha='center', va='center', fontsize=11, 
            fontweight='bold', color='white')
    ax.text(0.5, 0.15, subtitle, ha='center', va='center', fontsize=9, color='#8B949E')

# --- ROW 2: Borough Charts ---
# Property Count
ax1 = fig.add_subplot(gs[1, :3])
borough_counts = df['borough_name'].value_counts()
colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_counts.index]
bars = ax1.bar(borough_counts.index, borough_counts.values, color=colors, 
               edgecolor='white', linewidth=1.5)
style_axis(ax1, 'üìä Properties by Borough', ylabel='Count')
add_value_labels(ax1, bars, offset=200, fontsize=11)

# Median Price by Borough
ax2 = fig.add_subplot(gs[1, 3:])
median_prices = df.groupby('borough_name')['sale_price'].median().sort_values()
colors = [BOROUGH_COLORS.get(b, '#888') for b in median_prices.index]
bars = ax2.barh(median_prices.index, median_prices.values, color=colors, 
                edgecolor='white', linewidth=1.5)
style_axis(ax2, 'üí∞ Median Price by Borough', xlabel='Price ($)')
for bar, price in zip(bars, median_prices.values):
    ax2.text(bar.get_width() + 10000, bar.get_y() + bar.get_height()/2,
            f'${price/1e6:.2f}M', ha='left', va='center', fontsize=11, fontweight='bold')

# --- ROW 3: Distributions ---
# Price Distribution
ax3 = fig.add_subplot(gs[2, :3])
price_data = df[df['sale_price'] < 5000000]['sale_price']
ax3.hist(price_data, bins=50, color='#E63946', edgecolor='white', linewidth=0.5, alpha=0.8)
ax3.axvline(df['sale_price'].median(), color='#F4A261', linestyle='--', linewidth=3, 
           label=f'Median: ${df["sale_price"].median()/1e6:.2f}M')
style_axis(ax3, 'üìà Price Distribution (< $5M)', xlabel='Price ($)', ylabel='Frequency')
ax3.legend(fontsize=10)

# Building Category Pie
ax4 = fig.add_subplot(gs[2, 3:])
if 'building_category' in df.columns:
    cat_counts = df['building_category'].value_counts().head(8)
    colors_pie = plt.cm.Set2(np.linspace(0, 1, len(cat_counts)))
    wedges, texts, autotexts = ax4.pie(cat_counts.values, labels=None, autopct='%1.1f%%',
                                       colors=colors_pie, pctdistance=0.8)
    style_axis(ax4, 'üè¢ Building Categories', grid=False)
    for autotext in autotexts:
        autotext.set_fontsize(9)
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    ax4.legend(cat_counts.index, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=8)

# --- ROW 4: Price per SqFt & Age ---
# Price per SqFt
ax5 = fig.add_subplot(gs[3, :3])
if 'price_per_sqft' in df_res.columns:
    pps = df_res['price_per_sqft'].dropna()
    ax5.hist(pps, bins=50, color='#2A9D8F', edgecolor='white', linewidth=0.5, alpha=0.8)
    ax5.axvline(pps.median(), color='#F4A261', linestyle='--', linewidth=3,
               label=f'Median: ${pps.median():.0f}/sqft')
    style_axis(ax5, 'üìè Price per SqFt (Residential)', xlabel='$/SqFt', ylabel='Frequency')
    ax5.legend(fontsize=10)

# Age Distribution
ax6 = fig.add_subplot(gs[3, 3:])
if 'age_category' in df.columns:
    age_order = ['New (0-10)', 'Recent (10-25)', 'Modern (25-50)', 
                 'Mature (50-75)', 'Old (75-100)', 'Historic (100+)']
    age_counts = df['age_category'].value_counts().reindex(age_order).dropna()
    colors_age = [AGE_COLORS.get(a, '#888') for a in age_counts.index]
    bars = ax6.bar(range(len(age_counts)), age_counts.values, color=colors_age, 
                   edgecolor='white', linewidth=1)
    ax6.set_xticks(range(len(age_counts)))
    ax6.set_xticklabels([a.split()[0] for a in age_counts.index], rotation=45, ha='right')
    style_axis(ax6, 'üèõÔ∏è Building Age Distribution', ylabel='Count')

# --- ROW 5: Price Tiers & Borough PPS ---
# Price Tiers
ax7 = fig.add_subplot(gs[4, :3])
if 'price_tier' in df.columns:
    tier_order = ['Under $300K', '$300K-600K', '$600K-900K', '$900K-1.5M', '$1.5M-3M', 'Over $3M']
    tier_counts = df['price_tier'].value_counts().reindex(tier_order).dropna()
    colors_tier = [PRICE_TIER_COLORS.get(t, '#888') for t in tier_counts.index]
    bars = ax7.bar(range(len(tier_counts)), tier_counts.values, color=colors_tier,
                   edgecolor='white', linewidth=1)
    ax7.set_xticks(range(len(tier_counts)))
    ax7.set_xticklabels(tier_counts.index, rotation=45, ha='right', fontsize=9)
    style_axis(ax7, 'üíµ Sales by Price Tier', ylabel='Count')
    add_value_labels(ax7, bars, offset=100, fontsize=9)

# Borough PPS Comparison
ax8 = fig.add_subplot(gs[4, 3:])
if 'price_per_sqft' in df_res.columns:
    borough_pps = df_res.groupby('borough_name')['price_per_sqft'].median().sort_values()
    colors_pps = [BOROUGH_COLORS.get(b, '#888') for b in borough_pps.index]
    bars = ax8.barh(borough_pps.index, borough_pps.values, color=colors_pps,
                    edgecolor='white', linewidth=1.5)
    style_axis(ax8, 'üí≤ Median $/SqFt by Borough', xlabel='$/SqFt')
    for bar, pps_val in zip(bars, borough_pps.values):
        ax8.text(bar.get_width() + 10, bar.get_y() + bar.get_height()/2,
                f'${pps_val:.0f}', ha='left', va='center', fontsize=11, fontweight='bold')

save_chart('chart_01_main_dashboard.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 02: BOROUGH COMPARISON DASHBOARD
# -----------------------------------------------------------------------------
chart_num += 1

fig = plt.figure(figsize=(20, 14))
fig.suptitle('NYC BOROUGH COMPARISON DASHBOARD', fontsize=28, fontweight='bold', y=0.98)

gs = GridSpec(3, 5, figure=fig, hspace=0.35, wspace=0.3)

boroughs = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']

# Top row: Individual borough stats
for i, borough in enumerate(boroughs):
    ax = fig.add_subplot(gs[0, i])
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis('off')
    
    b_data = df[df['borough_name'] == borough]
    b_res = df_res[df_res['borough_name'] == borough] if 'borough_name' in df_res.columns else pd.DataFrame()
    
    color = BOROUGH_COLORS.get(borough, '#888')
    
    rect = mpatches.FancyBboxPatch((0.02, 0.02), 0.96, 0.96, 
                                    boxstyle="round,pad=0.02", 
                                    facecolor='#161B22', 
                                    edgecolor=color, linewidth=3)
    ax.add_patch(rect)
    
    ax.text(0.5, 0.88, borough.upper(), ha='center', va='center', fontsize=12, 
            fontweight='bold', color=color)
    ax.text(0.5, 0.72, f"{len(b_data):,}", ha='center', va='center', fontsize=18, 
            fontweight='bold', color='white')
    ax.text(0.5, 0.60, 'Properties', ha='center', va='center', fontsize=9, color='#8B949E')
    ax.text(0.5, 0.45, f"${b_data['sale_price'].median()/1e6:.2f}M", ha='center', 
            va='center', fontsize=14, fontweight='bold', color='#2A9D8F')
    ax.text(0.5, 0.33, 'Median Price', ha='center', va='center', fontsize=9, color='#8B949E')
    if len(b_res) > 0 and 'price_per_sqft' in b_res.columns:
        pps = b_res['price_per_sqft'].median()
        ax.text(0.5, 0.18, f"${pps:.0f}/sqft", ha='center', va='center', 
                fontsize=12, fontweight='bold', color='#F4A261')
    ax.text(0.5, 0.08, '$/SqFt', ha='center', va='center', fontsize=9, color='#8B949E')

# Second row: Price distributions by borough
for i, borough in enumerate(boroughs):
    ax = fig.add_subplot(gs[1, i])
    b_data = df[(df['borough_name'] == borough) & (df['sale_price'] < 5000000)]['sale_price']
    color = BOROUGH_COLORS.get(borough, '#888')
    ax.hist(b_data, bins=30, color=color, edgecolor='white', linewidth=0.5, alpha=0.8)
    ax.axvline(b_data.median(), color='white', linestyle='--', linewidth=2)
    ax.set_title(f'{borough}\nMedian: ${b_data.median()/1e6:.2f}M', fontsize=11, fontweight='bold')
    ax.set_xlabel('Price', fontsize=9)
    ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    ax.tick_params(axis='x', labelsize=8, rotation=45)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

# Third row: Price per SqFt distributions by borough
for i, borough in enumerate(boroughs):
    ax = fig.add_subplot(gs[2, i])
    if 'price_per_sqft' in df_res.columns:
        b_data = df_res[df_res['borough_name'] == borough]['price_per_sqft'].dropna()
        color = BOROUGH_COLORS.get(borough, '#888')
        if len(b_data) > 0:
            ax.hist(b_data, bins=30, color=color, edgecolor='white', linewidth=0.5, alpha=0.8)
            ax.axvline(b_data.median(), color='white', linestyle='--', linewidth=2)
            ax.set_title(f'$/SqFt\nMedian: ${b_data.median():.0f}', fontsize=11, fontweight='bold')
        ax.set_xlabel('$/SqFt', fontsize=9)
        ax.tick_params(axis='x', labelsize=8)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

save_chart('chart_02_borough_dashboard.png', chart_num)

# =============================================================================
# SECTION 2: BOROUGH ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 2: BOROUGH ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 03: Property Count by Borough
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))
borough_counts = df['borough_name'].value_counts().sort_values(ascending=True)
colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_counts.index]

bars = ax.barh(borough_counts.index, borough_counts.values, color=colors, 
               edgecolor='white', linewidth=2, height=0.7)

for bar, count in zip(bars, borough_counts.values):
    # Percentage of total
    pct = count / len(df) * 100
    ax.text(bar.get_width() + 100, bar.get_y() + bar.get_height()/2,
           f'{count:,} ({pct:.1f}%)', ha='left', va='center', fontsize=12, fontweight='bold')

style_axis(ax, 'Number of Property Sales by Borough', xlabel='Number of Properties')
ax.set_xlim(0, borough_counts.max() * 1.25)

# Add total annotation
ax.text(0.95, 0.05, f'Total: {len(df):,} properties', transform=ax.transAxes,
       fontsize=14, ha='right', va='bottom', 
       bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))

save_chart('chart_03_property_count_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 04: Median Price by Borough (Horizontal)
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))
median_prices = df.groupby('borough_name')['sale_price'].median().sort_values()
colors = [BOROUGH_COLORS.get(b, '#888') for b in median_prices.index]

bars = ax.barh(median_prices.index, median_prices.values, color=colors, 
               edgecolor='white', linewidth=2, height=0.7)

for bar, price in zip(bars, median_prices.values):
    ax.text(bar.get_width() + 20000, bar.get_y() + bar.get_height()/2,
           f'${price:,.0f}', ha='left', va='center', fontsize=13, fontweight='bold')

style_axis(ax, 'Median Sale Price by Borough', xlabel='Median Price ($)')
ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

# Add citywide median
citywide = df['sale_price'].median()
ax.axvline(citywide, color='#F4A261', linestyle='--', linewidth=3, 
           label=f'Citywide Median: ${citywide:,.0f}')
ax.legend(fontsize=12, loc='lower right')

save_chart('chart_04_median_price_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 05: Mean vs Median Price by Borough
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))

boroughs = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
x = np.arange(len(boroughs))
width = 0.35

medians = [df[df['borough_name'] == b]['sale_price'].median() for b in boroughs]
means = [df[df['borough_name'] == b]['sale_price'].mean() for b in boroughs]

bars1 = ax.bar(x - width/2, np.array(medians)/1e6, width, label='Median', 
               color='#2A9D8F', edgecolor='white', linewidth=1.5)
bars2 = ax.bar(x + width/2, np.array(means)/1e6, width, label='Mean', 
               color='#E63946', edgecolor='white', linewidth=1.5)

ax.set_xticks(x)
ax.set_xticklabels(boroughs, fontsize=12)
style_axis(ax, 'Mean vs Median Sale Price by Borough\n(Shows price skewness)', 
           ylabel='Price (Millions $)')
ax.legend(fontsize=12)

# Add value labels
for bar in bars1:
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
           f'${bar.get_height():.2f}M', ha='center', va='bottom', fontsize=9, fontweight='bold')
for bar in bars2:
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
           f'${bar.get_height():.2f}M', ha='center', va='bottom', fontsize=9, fontweight='bold')

save_chart('chart_05_mean_vs_median_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 06: Total Market Value by Borough
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))
total_values = df.groupby('borough_name')['sale_price'].sum().sort_values()
colors = [BOROUGH_COLORS.get(b, '#888') for b in total_values.index]

bars = ax.barh(total_values.index, total_values.values / 1e9, color=colors, 
               edgecolor='white', linewidth=2, height=0.7)

for bar, val in zip(bars, total_values.values):
    pct = val / total_values.sum() * 100
    ax.text(bar.get_width() + 0.2, bar.get_y() + bar.get_height()/2,
           f'${val/1e9:.1f}B ({pct:.1f}%)', ha='left', va='center', fontsize=12, fontweight='bold')

style_axis(ax, 'Total Market Value by Borough', xlabel='Total Value (Billions $)')

# Add total annotation
ax.text(0.95, 0.05, f'Total NYC Market: ${total_values.sum()/1e9:.1f}B', 
       transform=ax.transAxes, fontsize=14, ha='right', va='bottom',
       bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))

save_chart('chart_06_total_value_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 07: Borough Market Share (Pie Chart)
# -----------------------------------------------------------------------------
chart_num += 1

fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# By count
ax1 = axes[0]
borough_counts = df['borough_name'].value_counts()
colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_counts.index]
wedges, texts, autotexts = ax1.pie(borough_counts.values, labels=borough_counts.index, 
                                   autopct='%1.1f%%', colors=colors, pctdistance=0.75,
                                   explode=[0.02]*len(borough_counts), shadow=True)
ax1.set_title('Market Share by Property Count', fontsize=16, fontweight='bold', pad=20)
for autotext in autotexts:
    autotext.set_fontsize(11)
    autotext.set_color('white')
    autotext.set_fontweight('bold')

# By value
ax2 = axes[1]
borough_values = df.groupby('borough_name')['sale_price'].sum()
colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_values.index]
wedges, texts, autotexts = ax2.pie(borough_values.values, labels=borough_values.index, 
                                   autopct='%1.1f%%', colors=colors, pctdistance=0.75,
                                   explode=[0.02]*len(borough_values), shadow=True)
ax2.set_title('Market Share by Total Value', fontsize=16, fontweight='bold', pad=20)
for autotext in autotexts:
    autotext.set_fontsize(11)
    autotext.set_color('white')
    autotext.set_fontweight('bold')

save_chart('chart_07_borough_market_share.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 08: Price Distribution Box Plot by Borough
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))

borough_order = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
data_boxes = [df[df['borough_name'] == b]['sale_price'].values for b in borough_order]

bp = ax.boxplot(data_boxes, labels=borough_order, patch_artist=True, showfliers=False,
               whiskerprops=dict(color='white'), capprops=dict(color='white'),
               medianprops=dict(color='white', linewidth=2))

for patch, borough in zip(bp['boxes'], borough_order):
    patch.set_facecolor(BOROUGH_COLORS.get(borough, '#888'))
    patch.set_alpha(0.7)
    patch.set_edgecolor('white')
    patch.set_linewidth(1.5)

# Add median values
medians = [df[df['borough_name'] == b]['sale_price'].median() for b in borough_order]
for i, med in enumerate(medians):
    ax.text(i + 1, med, f'${med/1e6:.2f}M', ha='center', va='bottom', 
           fontsize=10, fontweight='bold', color='white',
           bbox=dict(boxstyle='round,pad=0.2', facecolor='#161B22', edgecolor='#30363D'))

style_axis(ax, 'Sale Price Distribution by Borough\n(Outliers excluded for clarity)', 
           xlabel='Borough', ylabel='Sale Price ($)')
ax.set_ylim(0, ax.get_ylim()[1])
ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

save_chart('chart_08_price_boxplot_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 09: Price Distribution Violin Plot by Borough
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))

# Filter for reasonable display
df_filtered = df[df['sale_price'] < 5000000]
borough_order = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
data_violin = [df_filtered[df_filtered['borough_name'] == b]['sale_price'].values for b in borough_order]

parts = ax.violinplot(data_violin, positions=range(1, 6), showmeans=True, showmedians=True)

for i, pc in enumerate(parts['bodies']):
    pc.set_facecolor(BOROUGH_COLORS.get(borough_order[i], '#888'))
    pc.set_edgecolor('white')
    pc.set_alpha(0.7)

parts['cmeans'].set_color('#F4A261')
parts['cmedians'].set_color('white')
parts['cbars'].set_color('white')
parts['cmins'].set_color('white')
parts['cmaxes'].set_color('white')

ax.set_xticks(range(1, 6))
ax.set_xticklabels(borough_order, fontsize=12)
style_axis(ax, 'Sale Price Distribution by Borough (Violin Plot)\n(Under $5M for clarity)', 
           xlabel='Borough', ylabel='Sale Price ($)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

# Add legend
ax.plot([], [], 'o', color='#F4A261', label='Mean')
ax.plot([], [], 'o', color='white', label='Median')
ax.legend(fontsize=11)

save_chart('chart_09_price_violin_borough.png', chart_num)

# =============================================================================
# SECTION 3: PRICE ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 3: PRICE ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 10: Overall Price Distribution Histogram
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))

price_data = df[df['sale_price'] < 5000000]['sale_price']
n, bins, patches = ax.hist(price_data, bins=80, color='#E63946', edgecolor='white', 
                           linewidth=0.3, alpha=0.8)

# Color gradient based on price
cm = plt.cm.RdYlGn_r
for i, patch in enumerate(patches):
    patch.set_facecolor(cm(i / len(patches)))

ax.axvline(df['sale_price'].median(), color='#F4A261', linestyle='--', linewidth=3, 
           label=f'Median: ${df["sale_price"].median():,.0f}')
ax.axvline(df['sale_price'].mean(), color='#00D4AA', linestyle='--', linewidth=3, 
           label=f'Mean: ${df["sale_price"].mean():,.0f}')

style_axis(ax, 'NYC Property Sale Price Distribution (Under $5M)', 
           xlabel='Sale Price ($)', ylabel='Number of Properties')
ax.legend(fontsize=12, loc='upper right')
ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

# Stats box
stats_text = f"Total: {len(df):,}\nShown: {len(price_data):,}\nMedian: ${df['sale_price'].median():,.0f}\nMean: ${df['sale_price'].mean():,.0f}\nStd Dev: ${df['sale_price'].std():,.0f}"
ax.text(0.97, 0.95, stats_text, transform=ax.transAxes, fontsize=11,
       verticalalignment='top', horizontalalignment='right',
       bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))

save_chart('chart_10_price_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 11: Price Distribution by Range (Segmented)
# -----------------------------------------------------------------------------
chart_num += 1

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

price_ranges = [
    (0, 500000, 'Under $500K', '#2ECC71'),
    (500000, 1000000, '$500K - $1M', '#3498DB'),
    (1000000, 3000000, '$1M - $3M', '#9B59B6'),
    (3000000, 10000000, '$3M - $10M', '#E74C3C')
]

for ax, (low, high, title, color) in zip(axes.flat, price_ranges):
    data = df[(df['sale_price'] >= low) & (df['sale_price'] < high)]['sale_price']
    if len(data) > 0:
        ax.hist(data, bins=40, color=color, edgecolor='white', linewidth=0.5, alpha=0.8)
        ax.axvline(data.median(), color='white', linestyle='--', linewidth=2,
                  label=f'Median: ${data.median():,.0f}')
        style_axis(ax, f'{title}\n({len(data):,} properties)', xlabel='Price ($)', ylabel='Count')
        ax.legend(fontsize=10)
        ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

plt.suptitle('Price Distribution by Range', fontsize=20, fontweight='bold', y=1.02)
save_chart('chart_11_price_distribution_segmented.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 12: Price Tier Distribution
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_tier' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    tier_order = ['Under $300K', '$300K-600K', '$600K-900K', '$900K-1.5M', '$1.5M-3M', 'Over $3M']
    tier_counts = df['price_tier'].value_counts().reindex(tier_order).dropna()
    colors = [PRICE_TIER_COLORS.get(t, '#888') for t in tier_counts.index]
    
    bars = ax.bar(range(len(tier_counts)), tier_counts.values, color=colors, 
                  edgecolor='white', linewidth=2)
    
    ax.set_xticks(range(len(tier_counts)))
    ax.set_xticklabels(tier_counts.index, rotation=45, ha='right', fontsize=11)
    style_axis(ax, 'Property Sales by Price Tier', ylabel='Number of Properties')
    
    # Add value labels with percentages
    total = tier_counts.sum()
    for bar, count in zip(bars, tier_counts.values):
        pct = count / total * 100
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100,
               f'{count:,}\n({pct:.1f}%)', ha='center', va='bottom', 
               fontsize=10, fontweight='bold')
    
    save_chart('chart_12_price_tier_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 13: Price Tier Stacked by Borough
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_tier' in df.columns:
    fig, ax = plt.subplots(figsize=(16, 10))
    
    tier_order = ['Under $300K', '$300K-600K', '$600K-900K', '$900K-1.5M', '$1.5M-3M', 'Over $3M']
    boroughs = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
    
    tier_borough = df.groupby(['borough_name', 'price_tier']).size().unstack(fill_value=0)
    tier_borough = tier_borough.reindex(columns=tier_order, fill_value=0)
    tier_borough = tier_borough.reindex(boroughs)
    
    # Stacked bar chart
    bottom = np.zeros(len(boroughs))
    for tier in tier_order:
        if tier in tier_borough.columns:
            color = PRICE_TIER_COLORS.get(tier, '#888')
            values = tier_borough[tier].values
            ax.bar(boroughs, values, bottom=bottom, label=tier, color=color, 
                   edgecolor='white', linewidth=0.5)
            bottom += values
    
    style_axis(ax, 'Price Tier Distribution by Borough', ylabel='Number of Properties')
    ax.legend(title='Price Tier', bbox_to_anchor=(1.02, 1), loc='upper left', fontsize=10)
    
    save_chart('chart_13_price_tier_by_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 14: Cumulative Price Distribution
# -----------------------------------------------------------------------------
chart_num += 1

fig, ax = plt.subplots(figsize=(14, 9))

sorted_prices = df['sale_price'].sort_values()
cumulative = np.arange(1, len(sorted_prices) + 1) / len(sorted_prices) * 100

ax.plot(sorted_prices.values, cumulative, color='#E63946', linewidth=3)
ax.fill_between(sorted_prices.values, cumulative, alpha=0.3, color='#E63946')

# Mark key percentiles
percentiles = [25, 50, 75, 90, 95]
for p in percentiles:
    val = np.percentile(df['sale_price'], p)
    ax.axhline(p, color='#30363D', linestyle=':', alpha=0.5)
    ax.axvline(val, color='#30363D', linestyle=':', alpha=0.5)
    ax.plot(val, p, 'o', color='#F4A261', markersize=10)
    ax.text(val, p + 2, f'{p}th: ${val/1e6:.2f}M', fontsize=9, ha='center')

style_axis(ax, 'Cumulative Distribution of Sale Prices', 
           xlabel='Sale Price ($)', ylabel='Cumulative Percentage (%)')
ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
ax.set_xlim(0, np.percentile(df['sale_price'], 99))
ax.set_ylim(0, 100)

save_chart('chart_14_cumulative_price_distribution.png', chart_num)

# =============================================================================
# SECTION 4: PRICE PER SQFT ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 4: PRICE PER SQFT ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 15: Price per SqFt Distribution (All Residential)
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_per_sqft' in df_res.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    pps_data = df_res['price_per_sqft'].dropna()
    
    n, bins, patches = ax.hist(pps_data, bins=60, color='#2A9D8F', edgecolor='white', 
                               linewidth=0.3, alpha=0.8)
    
    median_pps = pps_data.median()
    mean_pps = pps_data.mean()
    
    ax.axvline(median_pps, color='#F4A261', linestyle='--', linewidth=3, 
               label=f'Median: ${median_pps:,.0f}/sqft')
    ax.axvline(mean_pps, color='#E63946', linestyle='--', linewidth=3, 
               label=f'Mean: ${mean_pps:,.0f}/sqft')
    
    style_axis(ax, 'Price per Square Foot Distribution\n(Residential Properties)', 
               xlabel='Price per SqFt ($)', ylabel='Number of Properties')
    ax.legend(fontsize=12, loc='upper right')
    
    # Stats box
    stats_text = f"n = {len(pps_data):,}\nMedian: ${median_pps:,.0f}\nMean: ${mean_pps:,.0f}\nStd Dev: ${pps_data.std():,.0f}\nMin: ${pps_data.min():,.0f}\nMax: ${pps_data.max():,.0f}"
    ax.text(0.97, 0.75, stats_text, transform=ax.transAxes, fontsize=11,
           verticalalignment='top', horizontalalignment='right',
           bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))
    
    save_chart('chart_15_pps_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 16: Price per SqFt by Borough (Bar Chart)
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_per_sqft' in df_res.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    borough_pps = df_res.groupby('borough_name')['price_per_sqft'].agg(['median', 'mean', 'count'])
    borough_pps = borough_pps.sort_values('median')
    
    colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_pps.index]
    bars = ax.barh(borough_pps.index, borough_pps['median'].values, color=colors, 
                   edgecolor='white', linewidth=2, height=0.7)
    
    for bar, (idx, row) in zip(bars, borough_pps.iterrows()):
        ax.text(bar.get_width() + 10, bar.get_y() + bar.get_height()/2,
               f'${row["median"]:,.0f}/sqft (n={int(row["count"]):,})', 
               ha='left', va='center', fontsize=11, fontweight='bold')
    
    style_axis(ax, 'Median Price per SqFt by Borough\n(Residential Properties)', 
               xlabel='Median $/SqFt')
    
    # Add citywide reference
    citywide = df_res['price_per_sqft'].median()
    ax.axvline(citywide, color='#F4A261', linestyle='--', linewidth=3,
              label=f'Citywide Median: ${citywide:.0f}/sqft')
    ax.legend(fontsize=11, loc='lower right')
    
    save_chart('chart_16_pps_by_borough_bar.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 17: Price per SqFt by Borough (Box Plot)
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_per_sqft' in df_res.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    borough_order = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
    data_boxes = []
    for b in borough_order:
        data = df_res[df_res['borough_name'] == b]['price_per_sqft'].dropna()
        data_boxes.append(data.values)
    
    bp = ax.boxplot(data_boxes, labels=borough_order, patch_artist=True, showfliers=False)
    
    for patch, borough in zip(bp['boxes'], borough_order):
        patch.set_facecolor(BOROUGH_COLORS.get(borough, '#888'))
        patch.set_alpha(0.7)
        patch.set_edgecolor('white')
    
    for median in bp['medians']:
        median.set_color('white')
        median.set_linewidth(2)
    
    # Add median labels
    for i, b in enumerate(borough_order):
        med = df_res[df_res['borough_name'] == b]['price_per_sqft'].median()
        ax.text(i + 1, med + 20, f'${med:.0f}', ha='center', va='bottom', 
               fontsize=11, fontweight='bold')
    
    style_axis(ax, 'Price per SqFt Distribution by Borough\n(Residential Properties)', 
               xlabel='Borough', ylabel='$/SqFt')
    
    save_chart('chart_17_pps_boxplot_borough.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 18-22: Individual Borough PPS Charts
# -----------------------------------------------------------------------------

for borough in ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']:
    chart_num += 1
    
    if 'price_per_sqft' in df_res.columns:
        fig, ax = plt.subplots(figsize=(14, 9))
        
        borough_data = df_res[df_res['borough_name'] == borough]['price_per_sqft'].dropna()
        color = BOROUGH_COLORS.get(borough, '#888')
        
        if len(borough_data) > 0:
            ax.hist(borough_data, bins=50, color=color, edgecolor='white', 
                   linewidth=0.5, alpha=0.8)
            
            median_val = borough_data.median()
            mean_val = borough_data.mean()
            
            ax.axvline(median_val, color='white', linestyle='--', linewidth=3, 
                      label=f'Median: ${median_val:,.0f}')
            ax.axvline(mean_val, color='#F4A261', linestyle='--', linewidth=3, 
                      label=f'Mean: ${mean_val:,.0f}')
            
            style_axis(ax, f'{borough} - Price per Square Foot\n(Residential Properties)', 
                      xlabel='Price per SqFt ($)', ylabel='Number of Properties')
            ax.legend(fontsize=12, loc='upper right')
            
            # Stats box
            stats_text = (f"n = {len(borough_data):,}\n"
                         f"Median: ${median_val:,.0f}\n"
                         f"Mean: ${mean_val:,.0f}\n"
                         f"Std Dev: ${borough_data.std():,.0f}\n"
                         f"25th %: ${borough_data.quantile(0.25):,.0f}\n"
                         f"75th %: ${borough_data.quantile(0.75):,.0f}")
            ax.text(0.97, 0.95, stats_text, transform=ax.transAxes, fontsize=11,
                   verticalalignment='top', horizontalalignment='right',
                   bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor=color, linewidth=2))
        
        save_chart(f'chart_{chart_num:02d}_pps_{borough.lower().replace(" ", "_")}.png', chart_num)

# =============================================================================
# SECTION 5: BUILDING CATEGORY ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 5: BUILDING CATEGORY ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 23: Building Category Count (Horizontal Bar)
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_category' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 12))
    
    cat_counts = df['building_category'].value_counts()
    colors = plt.cm.Set2(np.linspace(0, 1, len(cat_counts)))
    
    bars = ax.barh(range(len(cat_counts)), cat_counts.values, color=colors, 
                   edgecolor='white', linewidth=0.5)
    
    ax.set_yticks(range(len(cat_counts)))
    ax.set_yticklabels(cat_counts.index, fontsize=10)
    ax.invert_yaxis()
    
    for bar, count in zip(bars, cat_counts.values):
        pct = count / len(df) * 100
        ax.text(bar.get_width() + 50, bar.get_y() + bar.get_height()/2,
               f'{count:,} ({pct:.1f}%)', ha='left', va='center', fontsize=9, fontweight='bold')
    
    style_axis(ax, 'Properties by Building Category', xlabel='Number of Properties')
    
    save_chart('chart_23_building_category_count.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 24: Building Category Pie Chart
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_category' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 12))
    
    cat_counts = df['building_category'].value_counts()
    
    # Combine small categories
    threshold = 0.02 * len(df)
    main_cats = cat_counts[cat_counts >= threshold]
    other_count = cat_counts[cat_counts < threshold].sum()
    if other_count > 0:
        main_cats['Other'] = other_count
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(main_cats)))
    
    wedges, texts, autotexts = ax.pie(main_cats.values, labels=main_cats.index, 
                                      autopct='%1.1f%%', colors=colors, 
                                      pctdistance=0.75, explode=[0.02]*len(main_cats))
    
    for text in texts:
        text.set_fontsize(9)
    for autotext in autotexts:
        autotext.set_fontsize(9)
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    ax.set_title('Building Category Distribution', fontsize=18, fontweight='bold', pad=20)
    
    save_chart('chart_24_building_category_pie.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 25: Median Price by Building Category
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_category' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 12))
    
    cat_prices = df.groupby('building_category')['sale_price'].agg(['median', 'count'])
    cat_prices = cat_prices[cat_prices['count'] >= 50]  # Filter low count
    cat_prices = cat_prices.sort_values('median')
    
    colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(cat_prices)))
    
    bars = ax.barh(range(len(cat_prices)), cat_prices['median'].values, 
                   color=colors, edgecolor='white', linewidth=0.5)
    
    ax.set_yticks(range(len(cat_prices)))
    ax.set_yticklabels(cat_prices.index, fontsize=10)
    
    for bar, (idx, row) in zip(bars, cat_prices.iterrows()):
        ax.text(bar.get_width() + 10000, bar.get_y() + bar.get_height()/2,
               f'${row["median"]:,.0f} (n={int(row["count"]):,})', 
               ha='left', va='center', fontsize=9, fontweight='bold')
    
    style_axis(ax, 'Median Price by Building Category\n(Min 50 properties)', 
               xlabel='Median Price ($)')
    ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    save_chart('chart_25_median_price_by_category.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 26: Price per SqFt by Building Category
# -----------------------------------------------------------------------------
chart_num += 1

if 'price_per_sqft' in df_res.columns and 'building_category' in df_res.columns:
    fig, ax = plt.subplots(figsize=(14, 12))
    
    cat_pps = df_res.groupby('building_category')['price_per_sqft'].agg(['median', 'count'])
    cat_pps = cat_pps[cat_pps['count'] >= 30]
    cat_pps = cat_pps.sort_values('median')
    
    colors = plt.cm.RdYlBu_r(np.linspace(0.2, 0.8, len(cat_pps)))
    
    bars = ax.barh(range(len(cat_pps)), cat_pps['median'].values, 
                   color=colors, edgecolor='white', linewidth=0.5)
    
    ax.set_yticks(range(len(cat_pps)))
    ax.set_yticklabels(cat_pps.index, fontsize=10)
    
    for bar, (idx, row) in zip(bars, cat_pps.iterrows()):
        ax.text(bar.get_width() + 5, bar.get_y() + bar.get_height()/2,
               f'${row["median"]:,.0f}/sqft (n={int(row["count"]):,})', 
               ha='left', va='center', fontsize=9, fontweight='bold')
    
    style_axis(ax, 'Median $/SqFt by Building Category\n(Residential, min 30 properties)', 
               xlabel='Median $/SqFt')
    
    save_chart('chart_26_pps_by_category.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 27: Building Category by Borough Heatmap
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_category' in df.columns:
    fig, ax = plt.subplots(figsize=(16, 12))
    
    # Get top 10 categories
    top_cats = df['building_category'].value_counts().head(10).index
    boroughs = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
    
    pivot = df[df['building_category'].isin(top_cats)].groupby(
        ['building_category', 'borough_name']).size().unstack(fill_value=0)
    pivot = pivot.reindex(columns=boroughs, fill_value=0)
    pivot = pivot.reindex(top_cats)
    
    im = ax.imshow(pivot.values, cmap='YlOrRd', aspect='auto')
    
    ax.set_xticks(range(len(boroughs)))
    ax.set_xticklabels(boroughs, fontsize=11)
    ax.set_yticks(range(len(top_cats)))
    ax.set_yticklabels(pivot.index, fontsize=10)
    
    # Add text annotations
    for i in range(len(pivot)):
        for j in range(len(boroughs)):
            val = pivot.iloc[i, j]
            color = 'white' if val > pivot.values.max() * 0.5 else 'black'
            ax.text(j, i, f'{val:,}', ha='center', va='center', 
                   fontsize=9, color=color, fontweight='bold')
    
    cbar = plt.colorbar(im, ax=ax, shrink=0.6)
    cbar.set_label('Number of Properties', fontsize=12)
    
    ax.set_title('Building Category by Borough (Top 10 Categories)', 
                fontsize=18, fontweight='bold', pad=20)
    
    save_chart('chart_27_category_borough_heatmap.png', chart_num)

# =============================================================================
# SECTION 6: BUILDING AGE ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 6: BUILDING AGE ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 28: Building Age Distribution Histogram
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_age' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    age_data = df[df['building_age'].notna()]['building_age']
    
    n, bins, patches = ax.hist(age_data, bins=50, color='#9B59B6', edgecolor='white', 
                               linewidth=0.5, alpha=0.8)
    
    # Color gradient by age
    cm = plt.cm.plasma
    for i, patch in enumerate(patches):
        patch.set_facecolor(cm(i / len(patches)))
    
    ax.axvline(age_data.median(), color='#F4A261', linestyle='--', linewidth=3, 
               label=f'Median: {age_data.median():.0f} years')
    ax.axvline(age_data.mean(), color='#2A9D8F', linestyle='--', linewidth=3, 
               label=f'Mean: {age_data.mean():.0f} years')
    
    style_axis(ax, 'Building Age Distribution', xlabel='Building Age (years)', 
               ylabel='Number of Properties')
    ax.legend(fontsize=12)
    
    # Stats box
    stats_text = f"n = {len(age_data):,}\nMedian: {age_data.median():.0f} yrs\nMean: {age_data.mean():.0f} yrs\nOldest: {age_data.max():.0f} yrs\nNewest: {age_data.min():.0f} yrs"
    ax.text(0.97, 0.95, stats_text, transform=ax.transAxes, fontsize=11,
           verticalalignment='top', horizontalalignment='right',
           bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))
    
    save_chart('chart_28_building_age_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 29: Age Category Distribution
# -----------------------------------------------------------------------------
chart_num += 1

if 'age_category' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    age_order = ['New (0-10)', 'Recent (10-25)', 'Modern (25-50)', 
                 'Mature (50-75)', 'Old (75-100)', 'Historic (100+)']
    age_counts = df['age_category'].value_counts().reindex(age_order).dropna()
    colors = [AGE_COLORS.get(a, '#888') for a in age_counts.index]
    
    bars = ax.bar(range(len(age_counts)), age_counts.values, color=colors, 
                  edgecolor='white', linewidth=2)
    
    ax.set_xticks(range(len(age_counts)))
    ax.set_xticklabels(age_counts.index, rotation=45, ha='right', fontsize=11)
    style_axis(ax, 'Properties by Building Age Category', ylabel='Number of Properties')
    
    # Add value labels with percentages
    total = age_counts.sum()
    for bar, count in zip(bars, age_counts.values):
        pct = count / total * 100
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100,
               f'{count:,}\n({pct:.1f}%)', ha='center', va='bottom', 
               fontsize=10, fontweight='bold')
    
    save_chart('chart_29_age_category_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 30: Median Price by Age Category
# -----------------------------------------------------------------------------
chart_num += 1

if 'age_category' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    age_order = ['New (0-10)', 'Recent (10-25)', 'Modern (25-50)', 
                 'Mature (50-75)', 'Old (75-100)', 'Historic (100+)']
    
    age_prices = df.groupby('age_category')['sale_price'].median().reindex(age_order).dropna()
    colors = [AGE_COLORS.get(a, '#888') for a in age_prices.index]
    
    bars = ax.bar(range(len(age_prices)), age_prices.values, color=colors, 
                  edgecolor='white', linewidth=2)
    
    ax.set_xticks(range(len(age_prices)))
    ax.set_xticklabels(age_prices.index, rotation=45, ha='right', fontsize=11)
    style_axis(ax, 'Median Sale Price by Building Age', ylabel='Median Price ($)')
    ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    for bar, price in zip(bars, age_prices.values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20000,
               f'${price/1e6:.2f}M', ha='center', va='bottom', 
               fontsize=11, fontweight='bold')
    
    save_chart('chart_30_price_by_age_category.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 31: Age vs Price Scatter
# -----------------------------------------------------------------------------
chart_num += 1

if 'building_age' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 10))
    
    # Sample for performance
    sample = df[(df['building_age'].notna()) & (df['sale_price'] < 5000000)].copy()
    if len(sample) > 5000:
        sample = sample.sample(5000, random_state=42)
    
    colors = [BOROUGH_COLORS.get(b, '#888') for b in sample['borough_name']]
    
    ax.scatter(sample['building_age'], sample['sale_price'], c=colors, alpha=0.5, s=20)
    
    style_axis(ax, 'Building Age vs Sale Price', xlabel='Building Age (years)', 
               ylabel='Sale Price ($)')
    ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    # Legend
    legend_patches = [mpatches.Patch(color=c, label=b) for b, c in BOROUGH_COLORS.items()]
    ax.legend(handles=legend_patches, loc='upper right', fontsize=10)
    
    save_chart('chart_31_age_vs_price_scatter.png', chart_num)

# =============================================================================
# SECTION 7: BUILDING SIZE ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 7: BUILDING SIZE ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 32: Building Size Distribution
# -----------------------------------------------------------------------------
chart_num += 1

if 'bldgarea' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    size_data = df[(df['bldgarea'] > 0) & (df['bldgarea'] < 10000)]['bldgarea']
    
    ax.hist(size_data, bins=60, color='#3498DB', edgecolor='white', linewidth=0.5, alpha=0.8)
    ax.axvline(size_data.median(), color='#F4A261', linestyle='--', linewidth=3, 
               label=f'Median: {size_data.median():,.0f} sqft')
    
    style_axis(ax, 'Building Size Distribution (Under 10K sqft)', 
               xlabel='Building Area (sqft)', ylabel='Number of Properties')
    ax.legend(fontsize=12)
    
    # Stats box
    stats_text = f"n = {len(size_data):,}\nMedian: {size_data.median():,.0f} sqft\nMean: {size_data.mean():,.0f} sqft"
    ax.text(0.97, 0.95, stats_text, transform=ax.transAxes, fontsize=11,
           verticalalignment='top', horizontalalignment='right',
           bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))
    
    save_chart('chart_32_building_size_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 33: Price vs Size Scatter
# -----------------------------------------------------------------------------
chart_num += 1

if 'bldgarea' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 10))
    
    scatter_df = df_res[(df_res['bldgarea'] > 0) & (df_res['bldgarea'] < 5000) & 
                        (df_res['sale_price'] < 5000000)].copy()
    
    if len(scatter_df) > 5000:
        scatter_df = scatter_df.sample(5000, random_state=42)
    
    colors = [BOROUGH_COLORS.get(b, '#888') for b in scatter_df['borough_name']]
    
    ax.scatter(scatter_df['bldgarea'], scatter_df['sale_price'], c=colors, alpha=0.5, s=20)
    
    style_axis(ax, 'Sale Price vs Building Size\n(Residential Properties)', 
               xlabel='Building Area (sqft)', ylabel='Sale Price ($)')
    ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    # Legend
    legend_patches = [mpatches.Patch(color=c, label=b) for b, c in BOROUGH_COLORS.items()]
    ax.legend(handles=legend_patches, loc='upper left', fontsize=10)
    
    # Add trend line
    z = np.polyfit(scatter_df['bldgarea'], scatter_df['sale_price'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(scatter_df['bldgarea'].min(), scatter_df['bldgarea'].max(), 100)
    ax.plot(x_line, p(x_line), '--', color='white', linewidth=2, alpha=0.7, label='Trend')
    
    save_chart('chart_33_price_vs_size_scatter.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 34: Building Size by Borough
# -----------------------------------------------------------------------------
chart_num += 1

if 'bldgarea' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    borough_order = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
    size_data = df[(df['bldgarea'] > 0) & (df['bldgarea'] < 20000)]
    data_boxes = [size_data[size_data['borough_name'] == b]['bldgarea'].values for b in borough_order]
    
    bp = ax.boxplot(data_boxes, labels=borough_order, patch_artist=True, showfliers=False)
    
    for patch, borough in zip(bp['boxes'], borough_order):
        patch.set_facecolor(BOROUGH_COLORS.get(borough, '#888'))
        patch.set_alpha(0.7)
        patch.set_edgecolor('white')
    
    for median in bp['medians']:
        median.set_color('white')
        median.set_linewidth(2)
    
    # Add median labels
    for i, b in enumerate(borough_order):
        med = size_data[size_data['borough_name'] == b]['bldgarea'].median()
        ax.text(i + 1, med + 100, f'{med:,.0f}', ha='center', va='bottom', 
               fontsize=10, fontweight='bold')
    
    style_axis(ax, 'Building Size Distribution by Borough', xlabel='Borough', 
               ylabel='Building Area (sqft)')
    
    save_chart('chart_34_size_by_borough.png', chart_num)

# =============================================================================
# SECTION 8: USE TYPE ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 8: USE TYPE ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 35: Use Type Distribution
# -----------------------------------------------------------------------------
chart_num += 1

if 'use_type' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    use_counts = df['use_type'].value_counts()
    colors = ['#E63946', '#457B9D', '#2A9D8F', '#9B59B6', '#F4A261'][:len(use_counts)]
    
    bars = ax.bar(range(len(use_counts)), use_counts.values, color=colors, 
                  edgecolor='white', linewidth=2)
    
    ax.set_xticks(range(len(use_counts)))
    ax.set_xticklabels(use_counts.index, rotation=45, ha='right', fontsize=12)
    style_axis(ax, 'Property Sales by Use Type', ylabel='Number of Properties')
    
    for bar, count in zip(bars, use_counts.values):
        pct = count / len(df) * 100
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100,
               f'{count:,}\n({pct:.1f}%)', ha='center', va='bottom', 
               fontsize=11, fontweight='bold')
    
    save_chart('chart_35_use_type_distribution.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 36: Use Type Pie Chart
# -----------------------------------------------------------------------------
chart_num += 1

if 'use_type' in df.columns:
    fig, ax = plt.subplots(figsize=(12, 10))
    
    use_counts = df['use_type'].value_counts()
    colors = ['#E63946', '#457B9D', '#2A9D8F', '#9B59B6', '#F4A261'][:len(use_counts)]
    
    wedges, texts, autotexts = ax.pie(use_counts.values, labels=use_counts.index, 
                                      autopct='%1.1f%%', colors=colors, 
                                      pctdistance=0.75, explode=[0.03]*len(use_counts),
                                      shadow=True)
    
    for autotext in autotexts:
        autotext.set_fontsize(12)
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    ax.set_title('Property Use Type Distribution', fontsize=18, fontweight='bold', pad=20)
    
    save_chart('chart_36_use_type_pie.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 37: Median Price by Use Type
# -----------------------------------------------------------------------------
chart_num += 1

if 'use_type' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 9))
    
    use_prices = df.groupby('use_type')['sale_price'].agg(['median', 'count'])
    use_prices = use_prices.sort_values('median')
    
    colors = ['#E63946', '#457B9D', '#2A9D8F', '#9B59B6', '#F4A261'][:len(use_prices)]
    
    bars = ax.barh(use_prices.index, use_prices['median'].values, color=colors, 
                   edgecolor='white', linewidth=2, height=0.6)
    
    for bar, (idx, row) in zip(bars, use_prices.iterrows()):
        ax.text(bar.get_width() + 20000, bar.get_y() + bar.get_height()/2,
               f'${row["median"]:,.0f} (n={int(row["count"]):,})', 
               ha='left', va='center', fontsize=12, fontweight='bold')
    
    style_axis(ax, 'Median Price by Use Type', xlabel='Median Price ($)')
    ax.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    save_chart('chart_37_price_by_use_type.png', chart_num)

# =============================================================================
# SECTION 9: CORRELATION & RELATIONSHIP ANALYSIS
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 9: CORRELATION ANALYSIS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 38: Correlation Heatmap
# -----------------------------------------------------------------------------
chart_num += 1

numeric_cols = ['sale_price', 'bldgarea', 'lotarea', 'residential_units', 
                'commercial_units', 'total_units', 'building_age']
numeric_cols = [c for c in numeric_cols if c in df.columns]

if len(numeric_cols) >= 3:
    fig, ax = plt.subplots(figsize=(12, 10))
    
    corr_df = df[numeric_cols].dropna()
    corr_matrix = corr_df.corr()
    
    im = ax.imshow(corr_matrix.values, cmap='RdBu_r', vmin=-1, vmax=1, aspect='auto')
    
    ax.set_xticks(range(len(numeric_cols)))
    ax.set_xticklabels(numeric_cols, rotation=45, ha='right', fontsize=10)
    ax.set_yticks(range(len(numeric_cols)))
    ax.set_yticklabels(numeric_cols, fontsize=10)
    
    # Add correlation values
    for i in range(len(numeric_cols)):
        for j in range(len(numeric_cols)):
            val = corr_matrix.iloc[i, j]
            color = 'white' if abs(val) > 0.5 else 'black'
            ax.text(j, i, f'{val:.2f}', ha='center', va='center', 
                   fontsize=10, color=color, fontweight='bold')
    
    cbar = plt.colorbar(im, ax=ax, shrink=0.8)
    cbar.set_label('Correlation', fontsize=12)
    
    ax.set_title('Correlation Matrix of Numeric Variables', fontsize=18, fontweight='bold', pad=20)
    
    save_chart('chart_38_correlation_heatmap.png', chart_num)

# -----------------------------------------------------------------------------
# CHART 39: Price vs Total Units Scatter
# -----------------------------------------------------------------------------
chart_num += 1

if 'total_units' in df.columns:
    fig, ax = plt.subplots(figsize=(14, 10))
    
    scatter_df = df[(df['total_units'] > 0) & (df['total_units'] <= 50) & 
                    (df['sale_price'] < 10000000)].copy()
    
    if len(scatter_df) > 3000:
        scatter_df = scatter_df.sample(3000, random_state=42)
    
    colors = [BOROUGH_COLORS.get(b, '#888') for b in scatter_df['borough_name']]
    
    ax.scatter(scatter_df['total_units'], scatter_df['sale_price'], c=colors, alpha=0.5, s=30)
    
    style_axis(ax, 'Sale Price vs Number of Units', xlabel='Total Units', 
               ylabel='Sale Price ($)')
    ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))
    
    legend_patches = [mpatches.Patch(color=c, label=b) for b, c in BOROUGH_COLORS.items()]
    ax.legend(handles=legend_patches, loc='upper left', fontsize=10)
    
    save_chart('chart_39_price_vs_units.png', chart_num)

# =============================================================================
# SECTION 10: SUMMARY STATISTICS CHART
# =============================================================================

print("\n" + "‚îÄ" * 50)
print("SECTION 10: SUMMARY STATISTICS")
print("‚îÄ" * 50)

# -----------------------------------------------------------------------------
# CHART 40: Complete Summary Statistics Dashboard
# -----------------------------------------------------------------------------
chart_num += 1

fig = plt.figure(figsize=(22, 14))
fig.suptitle('üìä NYC HOUSING MARKET - COMPLETE STATISTICAL SUMMARY', 
             fontsize=28, fontweight='bold', y=0.98)

gs = GridSpec(4, 4, figure=fig, hspace=0.4, wspace=0.3, 
              top=0.92, bottom=0.05, left=0.05, right=0.95)

# Overall stats text panel
ax_text = fig.add_subplot(gs[0, :2])
ax_text.axis('off')

overall_stats = f"""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë             OVERALL MARKET STATISTICS             ‚ïë
‚ï†‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï£
‚ïë  Total Properties:        {len(df):>20,}  ‚ïë
‚ïë  Total Market Value:      ${df['sale_price'].sum()/1e9:>17.2f}B  ‚ïë
‚ïë  Median Sale Price:       ${df['sale_price'].median():>17,.0f}  ‚ïë
‚ïë  Mean Sale Price:         ${df['sale_price'].mean():>17,.0f}  ‚ïë
‚ïë  Price Std Deviation:     ${df['sale_price'].std():>17,.0f}  ‚ïë
‚ïë  Min Price:               ${df['sale_price'].min():>17,.0f}  ‚ïë
‚ïë  Max Price:               ${df['sale_price'].max():>17,.0f}  ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
"""
ax_text.text(0.05, 0.95, overall_stats, transform=ax_text.transAxes, fontsize=11,
            verticalalignment='top', family='monospace',
            bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))

# Borough stats table
ax_table = fig.add_subplot(gs[0, 2:])
ax_table.axis('off')

borough_stats = "BOROUGH BREAKDOWN\n" + "‚îÄ"*50 + "\n"
for borough in ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']:
    b_data = df[df['borough_name'] == borough]
    count = len(b_data)
    median = b_data['sale_price'].median()
    borough_stats += f"{borough:<15} {count:>8,} props   ${median/1e6:>6.2f}M median\n"

ax_table.text(0.05, 0.95, borough_stats, transform=ax_table.transAxes, fontsize=11,
             verticalalignment='top', family='monospace',
             bbox=dict(boxstyle='round', facecolor='#161B22', edgecolor='#30363D'))

# Mini charts
# Price distribution
ax1 = fig.add_subplot(gs[1, :2])
price_data = df[df['sale_price'] < 5000000]['sale_price']
ax1.hist(price_data, bins=50, color='#E63946', edgecolor='white', linewidth=0.3, alpha=0.8)
ax1.axvline(df['sale_price'].median(), color='#F4A261', linestyle='--', linewidth=2)
style_axis(ax1, 'Price Distribution', xlabel='Price', ylabel='Count')
ax1.xaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

# Borough counts
ax2 = fig.add_subplot(gs[1, 2:])
borough_counts = df['borough_name'].value_counts()
colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_counts.index]
ax2.bar(borough_counts.index, borough_counts.values, color=colors, edgecolor='white')
style_axis(ax2, 'Properties by Borough', ylabel='Count')
ax2.tick_params(axis='x', rotation=45)

# Price per sqft
ax3 = fig.add_subplot(gs[2, :2])
if 'price_per_sqft' in df_res.columns:
    pps = df_res['price_per_sqft'].dropna()
    ax3.hist(pps, bins=50, color='#2A9D8F', edgecolor='white', linewidth=0.3, alpha=0.8)
    ax3.axvline(pps.median(), color='#F4A261', linestyle='--', linewidth=2)
style_axis(ax3, 'Price per SqFt (Residential)', xlabel='$/SqFt', ylabel='Count')

# Age distribution
ax4 = fig.add_subplot(gs[2, 2:])
if 'building_age' in df.columns:
    age = df['building_age'].dropna()
    ax4.hist(age, bins=50, color='#9B59B6', edgecolor='white', linewidth=0.3, alpha=0.8)
    ax4.axvline(age.median(), color='#F4A261', linestyle='--', linewidth=2)
style_axis(ax4, 'Building Age Distribution', xlabel='Age (years)', ylabel='Count')

# Price by borough boxplot
ax5 = fig.add_subplot(gs[3, :2])
borough_order = ['Manhattan', 'Brooklyn', 'Queens', 'Bronx', 'Staten Island']
data_box = [df[df['borough_name'] == b]['sale_price'].values for b in borough_order]
bp = ax5.boxplot(data_box, labels=[b[:4] for b in borough_order], patch_artist=True, showfliers=False)
for patch, borough in zip(bp['boxes'], borough_order):
    patch.set_facecolor(BOROUGH_COLORS.get(borough, '#888'))
    patch.set_alpha(0.7)
style_axis(ax5, 'Price Distribution by Borough', ylabel='Price ($)')
ax5.yaxis.set_major_formatter(mticker.FuncFormatter(format_currency))

# PPS by borough
ax6 = fig.add_subplot(gs[3, 2:])
if 'price_per_sqft' in df_res.columns:
    pps_borough = df_res.groupby('borough_name')['price_per_sqft'].median().reindex(borough_order)
    colors = [BOROUGH_COLORS.get(b, '#888') for b in borough_order]
    ax6.bar(range(len(borough_order)), pps_borough.values, color=colors, edgecolor='white')
    ax6.set_xticks(range(len(borough_order)))
    ax6.set_xticklabels([b[:4] for b in borough_order])
    for i, val in enumerate(pps_borough.values):
        ax6.text(i, val + 10, f'${val:.0f}', ha='center', fontsize=9, fontweight='bold')
style_axis(ax6, 'Median $/SqFt by Borough', ylabel='$/SqFt')

save_chart('chart_40_complete_summary.png', chart_num)

# =============================================================================
# FINAL SUMMARY
# =============================================================================

print("\n" + "="*80)
print("‚úÖ CHART GENERATION COMPLETE!")
print("="*80)

# Count actual files created
chart_files = [f for f in os.listdir(CHARTS_DIR) if f.endswith('.png')]

print(f"""
üìä SUMMARY:
{'‚îÄ'*50}
   Total Charts Created: {len(chart_files)}
   Output Directory: {CHARTS_DIR}/
   
üìÅ CHART CATEGORIES:
{'‚îÄ'*50}
   ‚Ä¢ Infographic Dashboards (2 charts)
   ‚Ä¢ Borough Analysis (7 charts)
   ‚Ä¢ Price Analysis (5 charts)
   ‚Ä¢ Price per SqFt Analysis (8 charts)
   ‚Ä¢ Building Category Analysis (5 charts)
   ‚Ä¢ Building Age Analysis (4 charts)
   ‚Ä¢ Building Size Analysis (3 charts)
   ‚Ä¢ Use Type Analysis (3 charts)
   ‚Ä¢ Correlation Analysis (2 charts)
   ‚Ä¢ Summary Statistics (1 chart)

üìà KEY INSIGHTS VISUALIZED:
{'‚îÄ'*50}
   ‚Ä¢ Property distribution across all 5 boroughs
   ‚Ä¢ Price distributions with medians and means
   ‚Ä¢ Price per square foot analysis (residential only)
   ‚Ä¢ Building age and category breakdowns
   ‚Ä¢ Market share and total values
   ‚Ä¢ Size vs price relationships
   ‚Ä¢ Correlation matrices
   ‚Ä¢ Complete statistical summaries
""")

print("üéâ All charts saved successfully!")