# Year 10 Geography Data Analysis - Group 3 Site Comparison
## Urban Planning & Land Use Analysis

This notebook analyzes urban geography data from 3 different sites, examining:
- Environmental Quality Standards (EQS)
- Building characteristics
- Noise levels
- Traffic patterns
- Pedestrian activity
- Land use types
- Urban hierarchy (high/low order)

**Data Source**: YR 10 DATA(GROUP 3).csv

## 1. Import Required Libraries

Import pandas, numpy, matplotlib, seaborn, and other necessary libraries for data analysis and visualization.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
import warnings

# Configure display settings
plt.style.use('default')
sns.set_palette("husl")
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

print("✅ Libraries imported successfully")
print(f"📊 Pandas version: {pd.__version__}")
print(f"🔢 NumPy version: {np.__version__}")

## 2. Load and Explore the Dataset

Load the CSV file using pandas and perform initial exploration including shape, columns, data types, and first few rows.

In [None]:
# Load the dataset
file_path = '/Users/simonwang/Documents/Usage/VibeCoding/DailyAssistant/projects/DonaldGeo/data/YR 10 DATA(GROUP 3).csv'

# Read the CSV file
try:
    # Try reading with different parameters to handle the unusual structure
    df_raw = pd.read_csv(file_path, header=None)
    print("✅ Dataset loaded successfully")
    print(f"📏 Dataset shape: {df_raw.shape}")
    print("\n📋 Raw data preview:")
    display(df_raw)
except Exception as e:
    print(f"❌ Error loading dataset: {e}")

In [None]:
# Examine the structure more carefully
print("🔍 Detailed data exploration:")
print(f"Number of rows: {len(df_raw)}")
print(f"Number of columns: {len(df_raw.columns)}")
print("\n📊 Data types:")
print(df_raw.dtypes)
print("\n🔎 First few rows with indices:")
for i, row in df_raw.head(10).iterrows():
    print(f"Row {i}: {row.tolist()}")

## 3. Data Cleaning and Preprocessing

Clean the dataset by removing duplicates, standardizing column names, and identifying data quality issues.

In [None]:
# Create a properly structured DataFrame from the raw data
# Based on the structure, we need to transpose and reorganize

# Extract the meaningful rows (skip empty rows)
data_rows = []
for i, row in df_raw.iterrows():
    if pd.notna(row[0]) and str(row[0]).strip() != '':
        data_rows.append(row.tolist())

print("🧹 Cleaning data structure...")
print(f"Found {len(data_rows)} data rows")

# Create structured DataFrame
# Row 0: Group identifier
# Row 1: Site headers
# Rows 2+: Data measurements

if len(data_rows) >= 3:
    # Extract site names (columns)
    sites = [col for col in data_rows[1][1:] if pd.notna(col) and str(col).strip() != '']
    print(f"📍 Sites identified: {sites}")
    
    # Create clean DataFrame
    clean_data = []
    
    for row in data_rows[3:]:  # Skip header rows
        if pd.notna(row[0]) and str(row[0]).strip() != '':
            measurement = str(row[0]).strip()
            values = []
            
            for i in range(1, len(row)):
                if i <= len(sites):
                    values.append(row[i] if pd.notna(row[i]) else None)
            
            # Pad values if needed
            while len(values) < len(sites):
                values.append(None)
            
            clean_data.append([measurement] + values[:len(sites)])
    
    # Create DataFrame
    columns = ['Measurement'] + sites
    df_clean = pd.DataFrame(clean_data, columns=columns)
    
    print("\n✅ Clean dataset created:")
    display(df_clean)
else:
    print("❌ Unable to parse data structure")

In [None]:
# Clean measurement names and standardize format
print("🧹 Standardizing measurement names...")

# Create a mapping for cleaner names
measurement_mapping = {
    'EQS': 'Environmental_Quality_Score',
    'BULDING HEIGHT ': 'Building_Height_m',
    'DECIBELS': 'Noise_Level_dB',
    'TRAFFIC COUNT': 'Traffic_Count',
    'PEDESTRIAN COUNT ': 'Pedestrian_Count',
    'ALL TRAFFIC OPTIONS ': 'Public_Transport',
    'LANDUSE ': 'Land_Use_Type',
    'HIGH OR LOW ORDER?': 'Urban_Hierarchy'
}

# Apply mapping
df_clean['Measurement_Clean'] = df_clean['Measurement'].map(
    lambda x: measurement_mapping.get(x, x.strip().replace(' ', '_'))
)

print("\n📋 Measurement mapping:")
for original, clean in zip(df_clean['Measurement'], df_clean['Measurement_Clean']):
    print(f"'{original}' → '{clean}'")

# Set clean measurement as index
df_clean.set_index('Measurement_Clean', inplace=True)
df_clean.drop('Measurement', axis=1, inplace=True)

print("\n✅ Cleaned dataset:")
display(df_clean)

## 4. Handle Missing Values

Identify missing values, analyze patterns, and implement appropriate strategies for handling them (removal, imputation, etc.).

In [None]:
# Check for missing values
print("🔍 Missing Value Analysis:")
print("\n📊 Missing values by measurement:")

missing_summary = df_clean.isnull().sum()
print(missing_summary)

# Calculate missing percentages
missing_pct = (df_clean.isnull().sum() / len(df_clean)) * 100
print("\n📈 Missing value percentages:")
for col in df_clean.columns:
    print(f"{col}: {missing_pct[col]:.1f}%")

# Visualize missing values
plt.figure(figsize=(10, 6))
sns.heatmap(df_clean.isnull(), cmap='viridis', cbar=True, yticklabels=True)
plt.title('Missing Values Heatmap')
plt.xlabel('Sites')
plt.ylabel('Measurements')
plt.tight_layout()
plt.show()

print("\n✅ Missing value analysis complete")
if missing_summary.sum() == 0:
    print("🎉 No missing values found!")
else:
    print(f"⚠️ Found {missing_summary.sum()} missing values")

## 5. Data Type Conversions

Convert columns to appropriate data types (numeric, datetime, categorical) for proper analysis.

In [None]:
# Create separate DataFrames for different data types
print("🔄 Processing data types...")

# Numeric measurements
numeric_measurements = ['Building_Height_m', 'Noise_Level_dB', 'Pedestrian_Count']
df_numeric = df_clean.loc[numeric_measurements].copy()

# Convert to numeric
for col in df_numeric.columns:
    df_numeric[col] = pd.to_numeric(df_numeric[col], errors='coerce')

print("\n📊 Numeric data:")
display(df_numeric)

# Categorical data
categorical_measurements = ['Environmental_Quality_Score', 'Land_Use_Type', 'Urban_Hierarchy']
df_categorical = df_clean.loc[categorical_measurements].copy()

print("\n📋 Categorical data:")
display(df_categorical)

# Complex data (Traffic and Transport)
complex_measurements = ['Traffic_Count', 'Public_Transport']
df_complex = df_clean.loc[complex_measurements].copy()

print("\n🚗 Complex data (Traffic & Transport):")
display(df_complex)

In [None]:
# Process traffic count data
print("🚗 Processing traffic count data...")

def parse_traffic_data(traffic_str):
    """Parse traffic count string into structured data"""
    if pd.isna(traffic_str):
        return {}
    
    traffic_dict = {}
    lines = str(traffic_str).split('\n')
    
    for line in lines:
        if ':' in line:
            vehicle, count = line.split(':', 1)
            try:
                traffic_dict[vehicle.strip()] = int(count.strip())
            except ValueError:
                pass
    
    return traffic_dict

# Parse traffic data for each site
traffic_data = {}
for site in df_complex.columns:
    traffic_str = df_complex.loc['Traffic_Count', site]
    traffic_data[site] = parse_traffic_data(traffic_str)

print("\n🚗 Parsed traffic data:")
for site, data in traffic_data.items():
    print(f"\n{site}:")
    for vehicle, count in data.items():
        print(f"  {vehicle}: {count}")
    total = sum(data.values())
    print(f"  Total: {total}")

# Create traffic DataFrame
all_vehicles = set()
for data in traffic_data.values():
    all_vehicles.update(data.keys())

traffic_matrix = []
for vehicle in sorted(all_vehicles):
    row = []
    for site in df_complex.columns:
        count = traffic_data[site].get(vehicle, 0)
        row.append(count)
    traffic_matrix.append(row)

df_traffic = pd.DataFrame(traffic_matrix, 
                         columns=df_complex.columns, 
                         index=sorted(all_vehicles))

print("\n📊 Traffic count matrix:")
display(df_traffic)

# Calculate totals
df_traffic.loc['Total'] = df_traffic.sum()
print("\n✅ Traffic data processing complete")

## 6. Basic Statistical Analysis

Generate descriptive statistics, correlation analysis, and summary insights about the cleaned dataset.

In [None]:
# Statistical analysis of numeric data
print("📊 Statistical Summary - Numeric Data")
print("=" * 50)

# Descriptive statistics
stats_summary = df_numeric.describe()
display(stats_summary)

# Additional statistics
print("\n📈 Additional Statistics:")
for measurement in df_numeric.index:
    values = df_numeric.loc[measurement].dropna()
    print(f"\n{measurement}:")
    print(f"  Range: {values.min():.1f} - {values.max():.1f}")
    print(f"  Mean: {values.mean():.1f}")
    print(f"  Median: {values.median():.1f}")
    print(f"  Std Dev: {values.std():.1f}")
    
    # Site comparison
    print(f"  Highest: {values.idxmax()} ({values.max():.1f})")
    print(f"  Lowest: {values.idxmin()} ({values.min():.1f})")

In [None]:
# Traffic analysis
print("🚗 Traffic Analysis")
print("=" * 50)

# Total traffic by site
total_traffic = df_traffic.loc['Total']
print("\n📊 Total Traffic by Site:")
for site in total_traffic.index:
    print(f"  {site}: {total_traffic[site]} vehicles")

# Most common vehicle types
vehicle_totals = df_traffic.drop('Total').sum(axis=1).sort_values(ascending=False)
print("\n🚙 Most Common Vehicle Types:")
for vehicle, count in vehicle_totals.items():
    print(f"  {vehicle}: {count} total")

# Traffic composition by site
print("\n📈 Traffic Composition (%)")
traffic_pct = df_traffic.drop('Total').div(df_traffic.loc['Total'], axis=1) * 100
display(traffic_pct.round(1))

In [None]:
# Site comparison analysis
print("🏙️ Site Comparison Analysis")
print("=" * 50)

# Create comprehensive site profile
site_profiles = {}

for site in df_clean.columns:
    profile = {
        'EQS': df_clean.loc['Environmental_Quality_Score', site],
        'Building_Height': df_numeric.loc['Building_Height_m', site],
        'Noise_Level': df_numeric.loc['Noise_Level_dB', site],
        'Total_Traffic': total_traffic[site],
        'Pedestrians': df_numeric.loc['Pedestrian_Count', site],
        'Land_Use': df_clean.loc['Land_Use_Type', site],
        'Urban_Order': df_clean.loc['Urban_Hierarchy', site]
    }
    site_profiles[site] = profile

# Display site profiles
for site, profile in site_profiles.items():
    print(f"\n📍 {site} Profile:")
    for key, value in profile.items():
        print(f"  {key}: {value}")

# Identify site characteristics
print("\n🔍 Site Characteristics:")
busiest_site = max(site_profiles.keys(), key=lambda x: site_profiles[x]['Total_Traffic'])
noisiest_site = max(site_profiles.keys(), key=lambda x: site_profiles[x]['Noise_Level'])
tallest_site = max(site_profiles.keys(), key=lambda x: site_profiles[x]['Building_Height'])

print(f"🚗 Busiest traffic: {busiest_site} ({site_profiles[busiest_site]['Total_Traffic']} vehicles)")
print(f"🔊 Noisiest: {noisiest_site} ({site_profiles[noisiest_site]['Noise_Level']} dB)")
print(f"🏢 Tallest buildings: {tallest_site} ({site_profiles[tallest_site]['Building_Height']}m)")

## 7. Create Data Visualizations

Create various plots including histograms, scatter plots, box plots, and bar charts to visualize data distributions and relationships.

In [None]:
# Comprehensive visualization dashboard
print("📊 Creating Urban Geography Visualizations")
print("=" * 50)

# Set up the plotting style
plt.style.use('seaborn-v0_8')
colors = ['#2E86AB', '#A23B72', '#F18F01']
sites = df_numeric.columns.tolist()

# Create a comprehensive dashboard
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Urban Geography Analysis - Site Comparison Dashboard', fontsize=16, fontweight='bold')

# 1. Building Height Comparison
ax1 = axes[0, 0]
heights = df_numeric.loc['Building_Height_m'].values
bars1 = ax1.bar(sites, heights, color=colors)
ax1.set_title('Building Heights by Site', fontweight='bold')
ax1.set_ylabel('Height (meters)')
ax1.tick_params(axis='x', rotation=45)
# Add value labels on bars
for bar, height in zip(bars1, heights):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
             f'{height}m', ha='center', va='bottom', fontweight='bold')

# 2. Noise Level Comparison
ax2 = axes[0, 1]
noise_levels = df_numeric.loc['Noise_Level_dB'].values
bars2 = ax2.bar(sites, noise_levels, color=colors)
ax2.set_title('Noise Levels by Site', fontweight='bold')
ax2.set_ylabel('Decibels (dB)')
ax2.tick_params(axis='x', rotation=45)
# Add value labels
for bar, noise in zip(bars2, noise_levels):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
             f'{noise}dB', ha='center', va='bottom', fontweight='bold')

# 3. Pedestrian Count Comparison
ax3 = axes[0, 2]
pedestrians = df_numeric.loc['Pedestrian_Count'].values
bars3 = ax3.bar(sites, pedestrians, color=colors)
ax3.set_title('Pedestrian Activity by Site', fontweight='bold')
ax3.set_ylabel('Number of Pedestrians')
ax3.tick_params(axis='x', rotation=45)
# Add value labels
for bar, ped in zip(bars3, pedestrians):
    ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2,
             f'{int(ped)}', ha='center', va='bottom', fontweight='bold')

# 4. Total Traffic Comparison
ax4 = axes[1, 0]
traffic_totals = [total_traffic[site] for site in sites]
bars4 = ax4.bar(sites, traffic_totals, color=colors)
ax4.set_title('Total Vehicle Traffic by Site', fontweight='bold')
ax4.set_ylabel('Number of Vehicles')
ax4.tick_params(axis='x', rotation=45)
# Add value labels
for bar, traffic in zip(bars4, traffic_totals):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
             f'{traffic}', ha='center', va='bottom', fontweight='bold')

# 5. Traffic Composition Stacked Bar
ax5 = axes[1, 1]
traffic_data_plot = df_traffic.drop('Total')
bottom = np.zeros(len(sites))

for i, vehicle in enumerate(traffic_data_plot.index):
    values = [traffic_data_plot.loc[vehicle, site] for site in sites]
    ax5.bar(sites, values, bottom=bottom, label=vehicle, alpha=0.8)
    bottom += values

ax5.set_title('Traffic Composition by Vehicle Type', fontweight='bold')
ax5.set_ylabel('Number of Vehicles')
ax5.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax5.tick_params(axis='x', rotation=45)

# 6. Multi-metric Radar Chart (using regular plot)
ax6 = axes[1, 2]
# Normalize metrics for comparison (0-100 scale)
metrics = ['Building\nHeight', 'Noise\nLevel', 'Pedestrian\nCount', 'Total\nTraffic']
site1_values = [
    (df_numeric.loc['Building_Height_m', sites[0]] / df_numeric.loc['Building_Height_m'].max()) * 100,
    (df_numeric.loc['Noise_Level_dB', sites[0]] / df_numeric.loc['Noise_Level_dB'].max()) * 100,
    (df_numeric.loc['Pedestrian_Count', sites[0]] / df_numeric.loc['Pedestrian_Count'].max()) * 100,
    (total_traffic[sites[0]] / max(traffic_totals)) * 100
]

x_pos = np.arange(len(metrics))
ax6.bar(x_pos, site1_values, color=colors[0], alpha=0.7, label=sites[0])
ax6.set_title(f'{sites[0]} - Normalized Metrics', fontweight='bold')
ax6.set_ylabel('Relative Score (0-100)')
ax6.set_xticks(x_pos)
ax6.set_xticklabels(metrics, rotation=45, ha='right')
ax6.set_ylim(0, 100)

plt.tight_layout()
plt.show()

print("✅ Dashboard created successfully")

In [None]:
# Interactive Plotly visualizations
print("🎨 Creating Interactive Visualizations with Plotly")

# Create subplots for interactive dashboard
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Building Heights vs Noise Levels', 'Traffic vs Pedestrian Activity',
                   'Site Comparison Radar', 'Vehicle Type Distribution'),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"type": "scatterpolar"}, {"type": "pie"}]]
)

# 1. Scatter plot: Building Height vs Noise Level
fig.add_trace(
    go.Scatter(
        x=df_numeric.loc['Building_Height_m'],
        y=df_numeric.loc['Noise_Level_dB'],
        mode='markers+text',
        text=sites,
        textposition="top center",
        marker=dict(size=15, color=colors),
        name="Sites"
    ),
    row=1, col=1
)

# 2. Scatter plot: Traffic vs Pedestrians
traffic_totals_list = [total_traffic[site] for site in sites]
pedestrian_list = df_numeric.loc['Pedestrian_Count'].tolist()

fig.add_trace(
    go.Scatter(
        x=traffic_totals_list,
        y=pedestrian_list,
        mode='markers+text',
        text=sites,
        textposition="top center",
        marker=dict(size=15, color=colors),
        name="Sites"
    ),
    row=1, col=2
)

# 3. Radar chart for first site
categories = ['Building Height', 'Noise Level', 'Pedestrian Count', 'Traffic Count', 'EQS Score']
site1_radar = [
    df_numeric.loc['Building_Height_m', sites[0]],
    df_numeric.loc['Noise_Level_dB', sites[0]],
    df_numeric.loc['Pedestrian_Count', sites[0]],
    total_traffic[sites[0]],
    3 if df_clean.loc['Environmental_Quality_Score', sites[0]] == '3+' else 2
]

fig.add_trace(
    go.Scatterpolar(
        r=site1_radar,
        theta=categories,
        fill='toself',
        name=sites[0]
    ),
    row=2, col=1
)

# 4. Pie chart for total vehicle types across all sites
vehicle_totals_pie = df_traffic.drop('Total').sum(axis=1)
fig.add_trace(
    go.Pie(
        labels=vehicle_totals_pie.index,
        values=vehicle_totals_pie.values,
        name="Vehicle Types"
    ),
    row=2, col=2
)

# Update layout
fig.update_layout(
    title="Urban Geography Interactive Dashboard",
    height=800,
    showlegend=True
)

# Update axis labels
fig.update_xaxes(title_text="Building Height (m)", row=1, col=1)
fig.update_yaxes(title_text="Noise Level (dB)", row=1, col=1)
fig.update_xaxes(title_text="Total Traffic Count", row=1, col=2)
fig.update_yaxes(title_text="Pedestrian Count", row=1, col=2)

fig.show()

print("✅ Interactive dashboard created")

## 8. Advanced Visualizations and Insights

Develop advanced visualizations like heatmaps, pair plots, and multi-dimensional charts to uncover deeper patterns and insights.

In [None]:
# Advanced correlation and pattern analysis
print("🔍 Advanced Pattern Analysis")
print("=" * 50)

# Create correlation matrix for numeric data
# Transpose to have sites as rows and measurements as columns
df_for_corr = df_numeric.T
df_for_corr['Total_Traffic'] = [total_traffic[site] for site in df_for_corr.index]

print("\n📊 Site-Measurement Matrix:")
display(df_for_corr)

# Calculate correlation matrix
correlation_matrix = df_for_corr.corr()

# Create correlation heatmap
plt.figure(figsize=(10, 8))
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='coolwarm', center=0,
            square=True, fmt='.2f', cbar_kws={"shrink": .8})
plt.title('Urban Metrics Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Identify strong correlations
print("\n🔗 Strong Correlations (|r| > 0.5):")
for i in range(len(correlation_matrix.columns)):
    for j in range(i+1, len(correlation_matrix.columns)):
        corr_val = correlation_matrix.iloc[i, j]
        if abs(corr_val) > 0.5:
            var1 = correlation_matrix.columns[i]
            var2 = correlation_matrix.columns[j]
            print(f"  {var1} ↔ {var2}: r = {corr_val:.3f}")

In [None]:
# Advanced site classification and clustering analysis
print("🏙️ Site Classification Analysis")
print("=" * 50)

# Create comprehensive site scoring system
def calculate_urban_intensity_score(site):
    """Calculate urban intensity score based on multiple factors"""
    score = 0
    
    # Building height (normalized to 0-30)
    height = df_numeric.loc['Building_Height_m', site]
    score += (height / 15) * 10  # Max 20 points
    
    # Traffic count (normalized)
    traffic = total_traffic[site]
    score += (traffic / 30) * 10  # Max 10 points
    
    # Pedestrian activity (normalized)
    pedestrians = df_numeric.loc['Pedestrian_Count', site]
    score += (pedestrians / 100) * 15  # Max 15 points
    
    # Noise level (higher = more urban)
    noise = df_numeric.loc['Noise_Level_dB', site]
    score += ((noise - 60) / 15) * 10  # Max 10 points
    
    # Land use bonus
    land_use = df_clean.loc['Land_Use_Type', site]
    if 'commercial' in str(land_use).lower():
        score += 5
    
    return max(0, score)

# Calculate scores for all sites
urban_scores = {site: calculate_urban_intensity_score(site) for site in sites}

print("\n🏙️ Urban Intensity Scores:")
for site, score in sorted(urban_scores.items(), key=lambda x: x[1], reverse=True):
    print(f"  {site}: {score:.1f} points")

# Visualize urban intensity
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Urban intensity bar chart
scores = list(urban_scores.values())
bars = ax1.bar(sites, scores, color=colors)
ax1.set_title('Urban Intensity Score by Site', fontweight='bold', fontsize=14)
ax1.set_ylabel('Urban Intensity Score')
ax1.tick_params(axis='x', rotation=45)

# Add score labels
for bar, score in zip(bars, scores):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
             f'{score:.1f}', ha='center', va='bottom', fontweight='bold')

# Multi-dimensional scatter plot
# X: Building height, Y: Traffic, Size: Pedestrians, Color: Noise
heights = [df_numeric.loc['Building_Height_m', site] for site in sites]
traffic_counts = [total_traffic[site] for site in sites]
pedestrian_counts = [df_numeric.loc['Pedestrian_Count', site] for site in sites]
noise_levels = [df_numeric.loc['Noise_Level_dB', site] for site in sites]

scatter = ax2.scatter(heights, traffic_counts, 
                    s=[p*3 for p in pedestrian_counts],  # Size based on pedestrians
                    c=noise_levels,  # Color based on noise
                    cmap='Reds', alpha=0.7)

# Add site labels
for i, site in enumerate(sites):
    ax2.annotate(site, (heights[i], traffic_counts[i]), 
                xytext=(5, 5), textcoords='offset points', fontweight='bold')

ax2.set_title('Multi-Dimensional Site Analysis\n(Size=Pedestrians, Color=Noise)', fontweight='bold', fontsize=14)
ax2.set_xlabel('Building Height (m)')
ax2.set_ylabel('Total Traffic Count')

# Add colorbar
cbar = plt.colorbar(scatter, ax=ax2)
cbar.set_label('Noise Level (dB)', rotation=270, labelpad=20)

plt.tight_layout()
plt.show()

print("✅ Advanced analysis complete")

In [None]:
# Final insights and recommendations
print("💡 Key Insights and Urban Planning Recommendations")
print("=" * 60)

# Identify site characteristics
max_score_site = max(urban_scores, key=urban_scores.get)
min_score_site = min(urban_scores, key=urban_scores.get)

print(f"\n🏆 Most Urban Intensive: {max_score_site}")
print(f"   Score: {urban_scores[max_score_site]:.1f}")
print(f"   Characteristics: High density, commercial area")

print(f"\n🌱 Least Urban Intensive: {min_score_site}")
print(f"   Score: {urban_scores[min_score_site]:.1f}")
print(f"   Characteristics: Lower density, residential area")

# Generate specific insights
insights = []

# Traffic insights
busiest_traffic = max(sites, key=lambda x: total_traffic[x])
insights.append(f"🚗 Traffic: {busiest_traffic} has the highest traffic volume ({total_traffic[busiest_traffic]} vehicles)")

# Noise insights
noisiest = max(sites, key=lambda x: df_numeric.loc['Noise_Level_dB', x])
noise_level = df_numeric.loc['Noise_Level_dB', noisiest]
if noise_level > 70:
    insights.append(f"🔊 Noise: {noisiest} exceeds WHO recommended levels ({noise_level} dB > 70 dB)")

# Pedestrian insights
most_pedestrians = max(sites, key=lambda x: df_numeric.loc['Pedestrian_Count', x])
insights.append(f"🚶 Pedestrian Activity: {most_pedestrians} shows highest foot traffic ({df_numeric.loc['Pedestrian_Count', most_pedestrians]} people)")

# Building insights
tallest = max(sites, key=lambda x: df_numeric.loc['Building_Height_m', x])
insights.append(f"🏢 Development: {tallest} has tallest buildings ({df_numeric.loc['Building_Height_m', tallest]}m), indicating higher density")

print("\n📊 Key Insights:")
for i, insight in enumerate(insights, 1):
    print(f"   {i}. {insight}")

# Recommendations
recommendations = [
    f"Urban Planning: {max_score_site} requires traffic management and noise reduction strategies",
    f"Transport: All sites have MTR access - enhance bus connectivity for {min_score_site}",
    f"Development: {min_score_site} has potential for sustainable low-rise development",
    "Environmental: Implement noise barriers in high-traffic commercial areas",
    "Pedestrian Safety: Improve walkways in high-activity zones"
]

print("\n💡 Planning Recommendations:")
for i, rec in enumerate(recommendations, 1):
    print(f"   {i}. {rec}")

print("\n" + "="*60)
print("📋 Analysis Summary Complete")
print(f"   • {len(sites)} sites analyzed")
print(f"   • {len(df_clean)} measurement types")
print(f"   • {len(vehicle_totals)} vehicle categories")
print(f"   • Comprehensive urban intensity scoring")
print("   • Interactive visualizations created")
print("   • Planning recommendations generated")
print("="*60)