# 🏠 Housing Price Prediction - Demo Notebook

Este notebook demuestra el uso del sistema de predicción de precios de viviendas.

## Contenido
1. Configuración
2. Exploración de datos
3. Uso del API
4. Análisis de predicciones

In [None]:
# Imports
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Configuración

In [None]:
# API URL
API_URL = "http://localhost:8000"

# Verificar que API está activa
response = requests.get(f"{API_URL}/health")
print("API Status:", response.json())

## 2. Exploración de Datos

In [None]:
# Cargar datos
df = pd.read_csv('data/boston_housing.csv')

print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
df.head()

In [None]:
# Estadísticas descriptivas
df.describe()

In [None]:
# Distribución del target (precio)
target_col = [col for col in df.columns if col in ['MEDV', 'medv', 'price', 'target']][-1]

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(df[target_col], bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Price ($1000s)')
plt.ylabel('Frequency')
plt.title('Distribution of Housing Prices')

plt.subplot(1, 2, 2)
plt.boxplot(df[target_col])
plt.ylabel('Price ($1000s)')
plt.title('Price Box Plot')

plt.tight_layout()
plt.show()

print(f"Mean price: ${df[target_col].mean():.2f}k")
print(f"Median price: ${df[target_col].median():.2f}k")
print(f"Std dev: ${df[target_col].std():.2f}k")

In [None]:
# Correlación de features con target
correlations = df.corr()[target_col].sort_values(ascending=False)

plt.figure(figsize=(10, 6))
correlations.drop(target_col).plot(kind='barh')
plt.xlabel('Correlation with Price')
plt.title('Feature Correlations')
plt.tight_layout()
plt.show()

print("\nTop positive correlations:")
print(correlations.head(6))
print("\nTop negative correlations:")
print(correlations.tail(5))

## 3. Uso del API

In [None]:
# Información del modelo
response = requests.get(f"{API_URL}/model/info")
model_info = response.json()

print("Model Type:", model_info['model_type'])
print("Features:", model_info['features'])
print("\nPerformance Metrics:")
if model_info['metrics']:
    for metric, value in model_info['metrics'].items():
        print(f"  {metric}: {value:.4f}")

In [None]:
# Hacer una predicción
sample_input = {
    "CRIM": 0.00632,
    "ZN": 18.0,
    "INDUS": 2.31,
    "CHAS": 0.0,
    "NOX": 0.538,
    "RM": 6.575,
    "AGE": 65.2,
    "DIS": 4.0900,
    "RAD": 1.0,
    "TAX": 296.0,
    "PTRATIO": 15.3,
    "B": 396.90,
    "LSTAT": 4.98
}

response = requests.post(
    f"{API_URL}/predict",
    json=sample_input
)

prediction = response.json()
print(f"Predicted Price: ${prediction['prediction']:.2f}k")
print(f"Inference Time: {prediction['inference_time']:.4f}s")

In [None]:
# Predicciones batch en test set
n_samples = 20
test_samples = df.drop(columns=[target_col]).head(n_samples).to_dict('records')
actual_prices = df[target_col].head(n_samples).values

predictions = []
for sample in test_samples:
    response = requests.post(f"{API_URL}/predict", json=sample)
    predictions.append(response.json()['prediction'])

# Comparar predicciones vs valores reales
results_df = pd.DataFrame({
    'Actual': actual_prices,
    'Predicted': predictions,
    'Error': np.abs(actual_prices - predictions)
})

print(results_df)
print(f"\nMean Absolute Error: ${results_df['Error'].mean():.2f}k")

In [None]:
# Visualizar predicciones vs reales
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(actual_prices, predictions, alpha=0.6)
plt.plot([actual_prices.min(), actual_prices.max()], 
         [actual_prices.min(), actual_prices.max()], 
         'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Price ($1000s)')
plt.ylabel('Predicted Price ($1000s)')
plt.title('Predictions vs Actual')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
residuals = actual_prices - predictions
plt.hist(residuals, bins=15, edgecolor='black', alpha=0.7)
plt.xlabel('Residual ($1000s)')
plt.ylabel('Frequency')
plt.title('Residual Distribution')
plt.axvline(x=0, color='r', linestyle='--', lw=2)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Análisis de Métricas del Sistema

In [None]:
# Métricas del sistema
response = requests.get(f"{API_URL}/metrics")
metrics = response.json()

print("System Metrics:")
print(f"  Total Predictions: {metrics['total_predictions']}")
print(f"  Avg Inference Time: {metrics['avg_inference_time']:.4f}s")
print(f"  P95 Inference Time: {metrics.get('p95_inference_time', 0):.4f}s")
print(f"  Avg Prediction: ${metrics.get('avg_prediction', 0):.2f}k")
print(f"  Uptime: {metrics['uptime_hours']:.2f} hours")

In [None]:
# Simular carga y medir latencia
n_requests = 100
latencies = []

for i in range(n_requests):
    sample = df.drop(columns=[target_col]).sample(1).to_dict('records')[0]
    response = requests.post(f"{API_URL}/predict", json=sample)
    latencies.append(response.json()['inference_time'])

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(latencies, alpha=0.6)
plt.axhline(y=np.mean(latencies), color='r', linestyle='--', label=f'Mean: {np.mean(latencies):.4f}s')
plt.xlabel('Request Number')
plt.ylabel('Latency (seconds)')
plt.title('Inference Latency Over Time')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.hist(latencies, bins=20, edgecolor='black', alpha=0.7)
plt.xlabel('Latency (seconds)')
plt.ylabel('Frequency')
plt.title('Latency Distribution')
plt.axvline(x=np.mean(latencies), color='r', linestyle='--', label='Mean')
plt.axvline(x=np.percentile(latencies, 95), color='orange', linestyle='--', label='P95')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nLatency Statistics:")
print(f"  Mean: {np.mean(latencies):.4f}s")
print(f"  Median: {np.median(latencies):.4f}s")
print(f"  P95: {np.percentile(latencies, 95):.4f}s")
print(f"  P99: {np.percentile(latencies, 99):.4f}s")
print(f"  Max: {np.max(latencies):.4f}s")

## 5. Conclusiones

- El modelo está sirviendo predicciones con baja latencia
- Las predicciones tienen un error promedio razonable
- El sistema es estable bajo carga
- El monitoreo está funcionando correctamente