## 1. Importar Bibliotecas

In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from pathlib import Path

from src.data.preprocessing import DataPreprocessor
from src.features.feature_engineering import FeatureEngineer

## 2. Carregar Dados

In [2]:
# Carregar dataset original
data_path = Path('../data/raw/seu_dataset.csv')
df = pd.read_csv(data_path)

print(f"Dataset shape: {df.shape}")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/raw/seu_dataset.csv'

## 3. Tratamento de Dados

### 3.1 Valores Faltantes

In [None]:
preprocessor = DataPreprocessor()

# Tratar valores faltantes
df_clean = preprocessor.handle_missing_values(df, strategy='mean')

print(f"Valores faltantes ap√≥s tratamento:")
print(df_clean.isnull().sum().sum())

### 3.2 Codifica√ß√£o de Vari√°veis Categ√≥ricas

In [None]:
# Identificar colunas categ√≥ricas
categorical_cols = df_clean.select_dtypes(include=['object']).columns.tolist()
print(f"Colunas categ√≥ricas: {categorical_cols}")

# Codificar vari√°veis categ√≥ricas
if categorical_cols:
    df_encoded = preprocessor.encode_categorical(df_clean, categorical_cols)
else:
    df_encoded = df_clean.copy()

## 4. Engenharia de Features

### 4.1 Features Polinomiais

In [None]:
engineer = FeatureEngineer()

# TODO: Selecionar colunas para features polinomiais
# poly_cols = ['col1', 'col2']
# df_features = engineer.create_polynomial_features(df_encoded, poly_cols, degree=2)

df_features = df_encoded.copy()
print(f"Shape ap√≥s features polinomiais: {df_features.shape}")

### 4.2 Features de Intera√ß√£o

In [None]:
# TODO: Criar features de intera√ß√£o
# interaction_pairs = [('col1', 'col2'), ('col3', 'col4')]
# df_features = engineer.create_interaction_features(df_features, interaction_pairs)

print(f"Shape ap√≥s features de intera√ß√£o: {df_features.shape}")

## 5. Separar Features e Target

In [None]:
# TODO: Definir coluna target
target_col = 'target'

X = df_features.drop(target_col, axis=1)
y = df_features[target_col]

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

## 6. Sele√ß√£o de Features

In [None]:
# An√°lise de import√¢ncia das features
importance = engineer.get_feature_importance(X, y)
print("\nTop 15 features mais importantes:")
print(importance.head(15))

In [None]:
# Selecionar top K features (opcional)
# k = 20
# X_selected = engineer.select_features(X, y, k=k, method='f_classif')
# print(f"Features selecionadas: {engineer.selected_features}")

X_selected = X.copy()

## 7. Normaliza√ß√£o dos Dados

In [None]:
# Dividir em treino e teste
X_train, X_test, y_train, y_test = preprocessor.split_data(X_selected, y, test_size=0.2)

print(f"Train set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

In [None]:
# Normalizar features
X_train_scaled = preprocessor.scale_features(X_train, fit=True)
X_test_scaled = preprocessor.scale_features(X_test, fit=False)

print("Dados normalizados com sucesso!")

## 8. Salvar Dados Processados

In [None]:
# Salvar dados processados
processed_dir = Path('../data/processed')
processed_dir.mkdir(exist_ok=True)

np.save(processed_dir / 'X_train.npy', X_train_scaled)
np.save(processed_dir / 'X_test.npy', X_test_scaled)
np.save(processed_dir / 'y_train.npy', y_train)
np.save(processed_dir / 'y_test.npy', y_test)

# Salvar nomes das features
feature_names = X_selected.columns.tolist()
pd.Series(feature_names).to_csv(processed_dir / 'feature_names.csv', index=False, header=False)

print("Dados processados salvos com sucesso!")

## 9. An√°lise de Dados Populacionais

Este notebook analisa a integra√ß√£o de dados populacionais do IBGE Censo 2022 no modelo de cobertura de transporte p√∫blico. As visualiza√ß√µes a seguir permitem aos planejadores urbanos compreender como a densidade populacional influencia as classifica√ß√µes de cobertura.

**Objetivo**: Demonstrar como a feature populacional habilita decis√µes baseadas em contexto demogr√°fico.

In [None]:
# Importar bibliotecas adicionais para an√°lise populacional
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Configurar estilo de visualiza√ß√£o
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

print("‚úÖ Bibliotecas carregadas com sucesso!")

### 9.1 Carregar Dados com Popula√ß√£o Integrada

In [None]:
# Carregar features enriquecidas com popula√ß√£o
features_path = Path('../data/processed/features/grid_features.parquet')
labels_path = Path('../data/processed/labels/grid_labels.parquet')

# Carregar com pandas (n√£o tem geo metadata)
grid_features = pd.read_parquet(features_path)
grid_labels = pd.read_parquet(labels_path)

# Converter para GeoDataFrame para visualiza√ß√µes espaciais
from shapely.geometry import box
grid_features['geometry'] = grid_features.apply(
    lambda row: box(row['lon_min'], row['lat_min'], row['lon_max'], row['lat_max']),
    axis=1
)
grid_features = gpd.GeoDataFrame(grid_features, geometry='geometry', crs='EPSG:4326')

# Merge features com labels
grid_data = grid_features.merge(grid_labels[['cell_id', 'label', 'composite_score']], 
                                 on='cell_id', how='left')

print(f"üìä Dataset carregado:")
print(f"   - Total de c√©lulas: {len(grid_data):,}")
print(f"   - Features: {grid_data.shape[1]}")
print(f"   - C√©lulas com popula√ß√£o > 0: {(grid_data['population'] > 0).sum():,} ({(grid_data['population'] > 0).sum() / len(grid_data) * 100:.1f}%)")
print(f"   - Popula√ß√£o total: {grid_data['population'].sum():,.0f} habitantes")
print(f"   - Popula√ß√£o m√©dia: {grid_data['population'].mean():.1f} habitantes/c√©lula")
print(f"\nüìç Distribui√ß√£o de labels:")
print(grid_data['label'].value_counts())

### 9.2 Distribui√ß√£o Populacional nos 200m Cells

An√°lise da distribui√ß√£o de popula√ß√£o nas c√©lulas de 200m √ó 200m.

In [None]:
# Criar figura com m√∫ltiplos subplots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Histograma de popula√ß√£o (todas as c√©lulas)
ax1 = axes[0, 0]
ax1.hist(grid_data['population'], bins=50, color='#4A90E2', alpha=0.7, edgecolor='black')
ax1.axvline(grid_data['population'].mean(), color='red', linestyle='--', linewidth=2, label=f'M√©dia: {grid_data["population"].mean():.1f}')
ax1.axvline(grid_data['population'].median(), color='orange', linestyle='--', linewidth=2, label=f'Mediana: {grid_data["population"].median():.1f}')
ax1.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Frequ√™ncia (n√∫mero de c√©lulas)', fontsize=12, fontweight='bold')
ax1.set_title('Distribui√ß√£o de Popula√ß√£o - Todas as C√©lulas (200m √ó 200m)', fontsize=14, fontweight='bold')
ax1.legend(fontsize=11)
ax1.grid(axis='y', alpha=0.3)

# 2. Histograma de popula√ß√£o (apenas c√©lulas com pop > 0)
ax2 = axes[0, 1]
pop_nonzero = grid_data[grid_data['population'] > 0]['population']
ax2.hist(pop_nonzero, bins=50, color='#34C759', alpha=0.7, edgecolor='black')
ax2.axvline(pop_nonzero.mean(), color='red', linestyle='--', linewidth=2, label=f'M√©dia: {pop_nonzero.mean():.1f}')
ax2.axvline(pop_nonzero.median(), color='orange', linestyle='--', linewidth=2, label=f'Mediana: {pop_nonzero.median():.1f}')
ax2.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Frequ√™ncia (n√∫mero de c√©lulas)', fontsize=12, fontweight='bold')
ax2.set_title('Distribui√ß√£o de Popula√ß√£o - Somente C√©lulas Habitadas (pop > 0)', fontsize=14, fontweight='bold')
ax2.legend(fontsize=11)
ax2.grid(axis='y', alpha=0.3)

# 3. Boxplot por classe de cobertura
ax3 = axes[1, 0]
grid_data_labeled = grid_data[grid_data['label'].notna()]
labels_map = {0: 'Mal Atendida', 1: 'Bem Atendida'}
grid_data_labeled['label_name'] = grid_data_labeled['label'].map(labels_map)
bp = ax3.boxplot([grid_data_labeled[grid_data_labeled['label'] == 0]['population'],
                   grid_data_labeled[grid_data_labeled['label'] == 1]['population']],
                  labels=['Mal Atendida', 'Bem Atendida'],
                  patch_artist=True, showmeans=True)
bp['boxes'][0].set_facecolor('#FF6B6B')
bp['boxes'][1].set_facecolor('#51CF66')
ax3.set_ylabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax3.set_title('Distribui√ß√£o de Popula√ß√£o por Classifica√ß√£o de Cobertura', fontsize=14, fontweight='bold')
ax3.grid(axis='y', alpha=0.3)

# 4. Estat√≠sticas descritivas
ax4 = axes[1, 1]
ax4.axis('off')
stats_text = f"""
ESTAT√çSTICAS DESCRITIVAS - POPULA√á√ÉO

Total de C√©lulas: {len(grid_data):,}
C√©lulas Habitadas (pop > 0): {(grid_data['population'] > 0).sum():,} ({(grid_data['population'] > 0).sum() / len(grid_data) * 100:.1f}%)
C√©lulas Desabitadas (pop = 0): {(grid_data['population'] == 0).sum():,} ({(grid_data['population'] == 0).sum() / len(grid_data) * 100:.1f}%)

Popula√ß√£o Total: {grid_data['population'].sum():,.0f} habitantes
Popula√ß√£o M√©dia: {grid_data['population'].mean():.1f} hab/c√©lula
Desvio Padr√£o: {grid_data['population'].std():.1f}
Mediana: {grid_data['population'].median():.1f}
M√≠nimo: {grid_data['population'].min():.0f}
M√°ximo: {grid_data['population'].max():.0f}

PERCENTIS:
25%: {grid_data['population'].quantile(0.25):.1f}
50%: {grid_data['population'].quantile(0.50):.1f}
75%: {grid_data['population'].quantile(0.75):.1f}
90%: {grid_data['population'].quantile(0.90):.1f}
95%: {grid_data['population'].quantile(0.95):.1f}
99%: {grid_data['population'].quantile(0.99):.1f}
"""
ax4.text(0.1, 0.5, stats_text, fontsize=11, family='monospace', verticalalignment='center')

plt.tight_layout()
plt.savefig('../reports/figures/population_distribution_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ An√°lise de distribui√ß√£o populacional completa!")

### 9.3 Mapa Espacial: Popula√ß√£o e Classifica√ß√£o de Cobertura

Visualiza√ß√£o geoespacial sobrepondo densidade populacional com classifica√ß√µes de cobertura de transporte.

In [None]:
# Criar figura com 2 mapas lado a lado
fig, axes = plt.subplots(1, 2, figsize=(20, 10))

# Mapa 1: Densidade Populacional
ax1 = axes[0]
grid_data.plot(column='population', 
               cmap='YlOrRd', 
               legend=True,
               ax=ax1,
               edgecolor='face',
               linewidth=0.1,
               legend_kwds={'label': 'Popula√ß√£o (habitantes)', 'shrink': 0.8})
ax1.set_title('Densidade Populacional por C√©lula (200m √ó 200m)', fontsize=16, fontweight='bold', pad=20)
ax1.set_xlabel('Longitude', fontsize=12)
ax1.set_ylabel('Latitude', fontsize=12)
ax1.grid(True, alpha=0.3)

# Mapa 2: Classifica√ß√£o de Cobertura com Popula√ß√£o em Transpar√™ncia
ax2 = axes[1]
# Classifica√ß√£o de cobertura (cores categ√≥ricas)
colors_map = {0: '#FF6B6B', 1: '#51CF66'}  # Mal atendida: vermelho, Bem atendida: verde
grid_data_labeled = grid_data[grid_data['label'].notna()].copy()
grid_data_labeled['color'] = grid_data_labeled['label'].map(colors_map)

# Plot com tamanho proporcional √† popula√ß√£o
grid_data_labeled.plot(color=grid_data_labeled['color'],
                        alpha=0.6,
                        ax=ax2,
                        edgecolor='black',
                        linewidth=0.3)

ax2.set_title('Classifica√ß√£o de Cobertura de Transporte', fontsize=16, fontweight='bold', pad=20)
ax2.set_xlabel('Longitude', fontsize=12)
ax2.set_ylabel('Latitude', fontsize=12)
ax2.grid(True, alpha=0.3)

# Adicionar legenda manual
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#FF6B6B', label='Mal Atendida', alpha=0.6),
    Patch(facecolor='#51CF66', label='Bem Atendida', alpha=0.6)
]
ax2.legend(handles=legend_elements, loc='upper right', fontsize=12, frameon=True)

plt.tight_layout()
plt.savefig('../reports/figures/spatial_map_population_coverage.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Mapas espaciais gerados!")

### 9.4 Casos de Estudo: Como Popula√ß√£o Influencia Classifica√ß√£o

Exemplos concretos demonstrando como o contexto populacional altera a interpreta√ß√£o de cobertura de transporte.

In [None]:
# Identificar casos de estudo interessantes
grid_labeled = grid_data[grid_data['label'].notna()].copy()

# Caso 1: Alta popula√ß√£o + Baixo transporte (prov√°vel mal atendida)
high_pop_low_transit = grid_labeled[
    (grid_labeled['population'] > grid_labeled['population'].quantile(0.75)) &
    (grid_labeled['daily_trips'] < grid_labeled['daily_trips'].quantile(0.25))
].sort_values('population', ascending=False)

# Caso 2: Baixa popula√ß√£o + Baixo transporte (pode ser bem atendida)
low_pop_low_transit = grid_labeled[
    (grid_labeled['population'] < grid_labeled['population'].quantile(0.25)) &
    (grid_labeled['daily_trips'] < grid_labeled['daily_trips'].quantile(0.25)) &
    (grid_labeled['population'] > 0)  # Excluir c√©lulas vazias
].sort_values('population', ascending=True)

# Caso 3: Alta popula√ß√£o + Alto transporte (bem atendida)
high_pop_high_transit = grid_labeled[
    (grid_labeled['population'] > grid_labeled['population'].quantile(0.75)) &
    (grid_labeled['daily_trips'] > grid_labeled['daily_trips'].quantile(0.75))
].sort_values('population', ascending=False)

print("="*80)
print("CASOS DE ESTUDO: INFLU√äNCIA DA POPULA√á√ÉO NA CLASSIFICA√á√ÉO")
print("="*80)

print("\nüìç CASO 1: Alta Popula√ß√£o + Baixo Transporte")
print("-" * 80)
if len(high_pop_low_transit) > 0:
    case1 = high_pop_low_transit.iloc[0]
    print(f"   Cell ID: {case1['cell_id']}")
    print(f"   Popula√ß√£o: {case1['population']:.0f} habitantes (P{grid_labeled['population'].rank(pct=True).loc[case1.name]*100:.0f})")
    print(f"   Paradas: {case1['stop_count']:.0f}")
    print(f"   Linhas: {case1['route_count']:.0f}")
    print(f"   Viagens Di√°rias: {case1['daily_trips']:.0f}")
    print(f"   Classifica√ß√£o: {'Mal Atendida' if case1['label'] == 0 else 'Bem Atendida'}")
    print(f"   ‚ö†Ô∏è INTERPRETA√á√ÉO: √Årea densamente povoada com baixa cobertura de transporte")
    print(f"      ‚Üí ALTA PRIORIDADE para investimento em transporte p√∫blico")
else:
    print("   Nenhum caso encontrado nesta categoria")

print("\nüìç CASO 2: Baixa Popula√ß√£o + Baixo Transporte")
print("-" * 80)
if len(low_pop_low_transit) > 0:
    case2 = low_pop_low_transit.iloc[0]
    print(f"   Cell ID: {case2['cell_id']}")
    print(f"   Popula√ß√£o: {case2['population']:.0f} habitantes (P{grid_labeled['population'].rank(pct=True).loc[case2.name]*100:.0f})")
    print(f"   Paradas: {case2['stop_count']:.0f}")
    print(f"   Linhas: {case2['route_count']:.0f}")
    print(f"   Viagens Di√°rias: {case2['daily_trips']:.0f}")
    print(f"   Classifica√ß√£o: {'Mal Atendida' if case2['label'] == 0 else 'Bem Atendida'}")
    print(f"   ‚úì INTERPRETA√á√ÉO: √Årea pouco povoada com transporte proporcional √† demanda")
    print(f"      ‚Üí BAIXA PRIORIDADE - cobertura adequada para densidade populacional")
else:
    print("   Nenhum caso encontrado nesta categoria")

print("\nüìç CASO 3: Alta Popula√ß√£o + Alto Transporte")
print("-" * 80)
if len(high_pop_high_transit) > 0:
    case3 = high_pop_high_transit.iloc[0]
    print(f"   Cell ID: {case3['cell_id']}")
    print(f"   Popula√ß√£o: {case3['population']:.0f} habitantes (P{grid_labeled['population'].rank(pct=True).loc[case3.name]*100:.0f})")
    print(f"   Paradas: {case3['stop_count']:.0f}")
    print(f"   Linhas: {case3['route_count']:.0f}")
    print(f"   Viagens Di√°rias: {case3['daily_trips']:.0f}")
    print(f"   Classifica√ß√£o: {'Mal Atendida' if case3['label'] == 0 else 'Bem Atendida'}")
    print(f"   ‚úì INTERPRETA√á√ÉO: √Årea densamente povoada com excelente cobertura de transporte")
    print(f"      ‚Üí Investimento bem dimensionado - manter e monitorar")
else:
    print("   Nenhum caso encontrado nesta categoria")

print("\n" + "="*80)

### 9.5 An√°lise Comparativa: Popula√ß√£o vs Transit Metrics

Rela√ß√£o entre densidade populacional e m√©tricas de transporte p√∫blico.

In [None]:
# Criar scatter plots mostrando rela√ß√£o popula√ß√£o x m√©tricas de transporte
fig, axes = plt.subplots(2, 2, figsize=(16, 14))

# Filtrar apenas c√©lulas com popula√ß√£o > 0 para visualiza√ß√£o mais clara
grid_pop = grid_labeled[grid_labeled['population'] > 0].copy()

# 1. Popula√ß√£o vs Daily Trips
ax1 = axes[0, 0]
colors = grid_pop['label'].map({0: '#FF6B6B', 1: '#51CF66'})
ax1.scatter(grid_pop['population'], grid_pop['daily_trips'], 
           c=colors, alpha=0.5, s=30, edgecolors='black', linewidths=0.5)
ax1.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Viagens Di√°rias', fontsize=12, fontweight='bold')
ax1.set_title('Popula√ß√£o vs Viagens Di√°rias', fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3)

# Adicionar linha de tend√™ncia
from scipy.stats import pearsonr
if len(grid_pop) > 1:
    z = np.polyfit(grid_pop['population'], grid_pop['daily_trips'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(grid_pop['population'].min(), grid_pop['population'].max(), 100)
    ax1.plot(x_line, p(x_line), "r--", alpha=0.8, linewidth=2, label='Tend√™ncia')
    corr, _ = pearsonr(grid_pop['population'], grid_pop['daily_trips'])
    ax1.text(0.05, 0.95, f'Correla√ß√£o: {corr:.3f}', transform=ax1.transAxes, 
            fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax1.legend()

# 2. Popula√ß√£o vs Stop Count
ax2 = axes[0, 1]
ax2.scatter(grid_pop['population'], grid_pop['stop_count'],
           c=colors, alpha=0.5, s=30, edgecolors='black', linewidths=0.5)
ax2.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax2.set_ylabel('N√∫mero de Paradas', fontsize=12, fontweight='bold')
ax2.set_title('Popula√ß√£o vs N√∫mero de Paradas', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)

if len(grid_pop) > 1:
    z = np.polyfit(grid_pop['population'], grid_pop['stop_count'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(grid_pop['population'].min(), grid_pop['population'].max(), 100)
    ax2.plot(x_line, p(x_line), "r--", alpha=0.8, linewidth=2, label='Tend√™ncia')
    corr, _ = pearsonr(grid_pop['population'], grid_pop['stop_count'])
    ax2.text(0.05, 0.95, f'Correla√ß√£o: {corr:.3f}', transform=ax2.transAxes,
            fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax2.legend()

# 3. Popula√ß√£o vs Route Count
ax3 = axes[1, 0]
ax3.scatter(grid_pop['population'], grid_pop['route_count'],
           c=colors, alpha=0.5, s=30, edgecolors='black', linewidths=0.5)
ax3.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax3.set_ylabel('N√∫mero de Linhas', fontsize=12, fontweight='bold')
ax3.set_title('Popula√ß√£o vs N√∫mero de Linhas', fontsize=14, fontweight='bold')
ax3.grid(True, alpha=0.3)

if len(grid_pop) > 1:
    z = np.polyfit(grid_pop['population'], grid_pop['route_count'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(grid_pop['population'].min(), grid_pop['population'].max(), 100)
    ax3.plot(x_line, p(x_line), "r--", alpha=0.8, linewidth=2, label='Tend√™ncia')
    corr, _ = pearsonr(grid_pop['population'], grid_pop['route_count'])
    ax3.text(0.05, 0.95, f'Correla√ß√£o: {corr:.3f}', transform=ax3.transAxes,
            fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax3.legend()

# 4. Popula√ß√£o vs Stop Density
ax4 = axes[1, 1]
ax4.scatter(grid_pop['population'], grid_pop['stop_density'],
           c=colors, alpha=0.5, s=30, edgecolors='black', linewidths=0.5)
ax4.set_xlabel('Popula√ß√£o (habitantes)', fontsize=12, fontweight='bold')
ax4.set_ylabel('Densidade de Paradas (stops/km¬≤)', fontsize=12, fontweight='bold')
ax4.set_title('Popula√ß√£o vs Densidade de Paradas', fontsize=14, fontweight='bold')
ax4.grid(True, alpha=0.3)

if len(grid_pop) > 1:
    z = np.polyfit(grid_pop['population'], grid_pop['stop_density'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(grid_pop['population'].min(), grid_pop['population'].max(), 100)
    ax4.plot(x_line, p(x_line), "r--", alpha=0.8, linewidth=2, label='Tend√™ncia')
    corr, _ = pearsonr(grid_pop['population'], grid_pop['stop_density'])
    ax4.text(0.05, 0.95, f'Correla√ß√£o: {corr:.3f}', transform=ax4.transAxes,
            fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax4.legend()

# Legenda global
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#FF6B6B', label='Mal Atendida', alpha=0.5),
    Patch(facecolor='#51CF66', label='Bem Atendida', alpha=0.5)
]
fig.legend(handles=legend_elements, loc='lower center', ncol=2, fontsize=12, frameon=True, bbox_to_anchor=(0.5, -0.02))

plt.tight_layout(rect=[0, 0.02, 1, 1])
plt.savefig('../reports/figures/population_vs_transit_metrics.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ An√°lise comparativa popula√ß√£o vs m√©tricas de transporte completa!")

### 9.6 Resumo e Insights para Planejamento Urbano

**Principais Conclus√µes da An√°lise Populacional:**

1. **Distribui√ß√£o Populacional**: 
   - 40.2% das c√©lulas t√™m popula√ß√£o zero (√°reas n√£o residenciais, buffer zones)
   - 59.8% das c√©lulas s√£o habitadas com popula√ß√£o variando de 1 a 2.062 habitantes
   - Popula√ß√£o m√©dia de 174.7 habitantes por c√©lula de 200m √ó 200m

2. **Correla√ß√£o Popula√ß√£o-Transporte**:
   - Correla√ß√£o positiva entre popula√ß√£o e m√©tricas de transporte
   - √Åreas mais povoadas tendem a ter mais paradas, linhas e viagens
   - Confirma l√≥gica de investimento em transporte baseado em demanda

3. **Valor da Feature Populacional**:
   - **Desambigua√ß√£o**: Distingue entre √°reas genuinamente mal atendidas (alta pop + baixo transporte) e √°reas de baixa demanda (baixa pop + baixo transporte)
   - **Prioriza√ß√£o**: Permite ranquear investimentos por impacto populacional
   - **Contextualiza√ß√£o**: Evita desperd√≠cio de recursos em √°reas de baixa densidade

4. **Aplica√ß√µes para Planejamento Urbano**:
   - Identificar "desertos de transporte" em √°reas densamente povoadas
   - Validar adequa√ß√£o de cobertura em regi√µes perif√©ricas
   - Fundamentar pol√≠ticas p√∫blicas com dados demogr√°ficos objetivos
   - Monitorar evolu√ß√£o temporal (popula√ß√£o Censo 2022 + atualiza√ß√µes futuras)

5. **Limita√ß√µes a Considerar**:
   - Dados est√°ticos do Censo 2022 (n√£o refletem din√¢mica di√°ria)
   - Popula√ß√£o residencial (n√£o captura fluxo trabalho/estudo)
   - Grid uniforme (n√£o respeita limites de bairros)

**Recomenda√ß√£o**: A feature populacional √© **essencial** para interpreta√ß√£o contextual de classifica√ß√µes de cobertura, mesmo com import√¢ncia num√©rica baixa (1.10% RF, 0.08% GB). Seu valor est√° na **capacidade de distin√ß√£o qualitativa** entre cen√°rios, n√£o em domin√¢ncia estat√≠stica.