In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import json

# Project paths
PROJECT_ROOT = Path('..').resolve()
DATA_DIR = PROJECT_ROOT / 'data' / 'processed'

# Load cleaned data
df = pd.read_parquet(DATA_DIR / 'cleaned.parquet')
print(f"Loaded {len(df):,} rows")

ModuleNotFoundError: No module named 'statsmodels'

## Bi·ªÉu ƒë·ªì 1: PM2.5 To√†n Giai ƒêo·∫°n (2013-2017)

**M·ª•c ƒë√≠ch**: Nh√¨n t·ªïng quan xu h∆∞·ªõng d√†i h·∫°n c·ªßa ch·∫•t l∆∞·ª£ng kh√¥ng kh√≠

In [None]:
# L·∫•y 1 tr·∫°m ƒë·∫°i di·ªán
station = 'Aotizhongxin'
station_df = df[df['station'] == station].sort_values('datetime').copy()

# V·∫Ω to√†n giai ƒëo·∫°n
fig, ax = plt.subplots(figsize=(16, 6))

# Raw series
ax.plot(station_df['datetime'], station_df['PM2.5'], 
        linewidth=0.3, alpha=0.4, color='gray', label='PM2.5 (hourly)')

# Rolling mean (30 days) ƒë·ªÉ nh√¨n xu h∆∞·ªõng
station_df_indexed = station_df.set_index('datetime')
rolling_30d = station_df_indexed['PM2.5'].rolling(window=24*30, min_periods=24*7).mean()
ax.plot(rolling_30d.index, rolling_30d.values, 
        linewidth=3, color='red', label='Rolling Mean (30 days)', alpha=0.8)

# Th√™m c√°c m·ª©c AQI
ax.axhline(y=35, color='green', linestyle='--', linewidth=1, alpha=0.5, label='Good (<35)')
ax.axhline(y=75, color='yellow', linestyle='--', linewidth=1, alpha=0.5, label='Moderate (<75)')
ax.axhline(y=150, color='orange', linestyle='--', linewidth=1, alpha=0.5, label='Unhealthy (<150)')

ax.set_title(f'[Bi·ªÉu ƒë·ªì 1] PM2.5 To√†n Giai ƒêo·∫°n 2013-2017 - Tr·∫°m {station}', fontsize=14, fontweight='bold')
ax.set_ylabel('PM2.5 (Œºg/m¬≥)', fontsize=12)
ax.set_xlabel('Th·ªùi gian', fontsize=12)
ax.legend(loc='upper right')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

### üìä Di·ªÖn gi·∫£i Bi·ªÉu ƒë·ªì 1:

1. **Xu h∆∞·ªõng m√πa v·ª• r√µ r√†ng**: PM2.5 cao v√†o m√πa ƒë√¥ng (th√°ng 11-2) v√† th·∫•p v√†o m√πa h√® (th√°ng 6-8). ƒêi·ªÅu n√†y ph·∫£n √°nh ho·∫°t ƒë·ªông s∆∞·ªüi ·∫•m v√† ƒëi·ªÅu ki·ªán kh√≠ t∆∞·ª£ng m√πa ƒë√¥ng b·∫•t l·ª£i.

2. **Bi·∫øn ƒë·ªông c·ª±c ƒëoan**: C√≥ nhi·ªÅu ƒë·ªânh spike v∆∞·ª£t 300 Œºg/m¬≥ (m·ª©c Hazardous), ƒë·∫∑c bi·ªát trong m√πa ƒë√¥ng. C√°c s·ª± ki·ªán n√†y c·∫ßn ƒë∆∞·ª£c c·∫£nh b√°o s·ªõm.

3. **Kh√¥ng c√≥ xu h∆∞·ªõng gi·∫£m r√µ r·ªát**: Rolling mean 30 ng√†y dao ƒë·ªông quanh 60-100 Œºg/m¬≥ su·ªët 4 nƒÉm, cho th·∫•y ch·∫•t l∆∞·ª£ng kh√¥ng kh√≠ ch∆∞a c·∫£i thi·ªán ƒë√°ng k·ªÉ.

4. **T·∫ßm quan tr·ªçng c·ªßa d·ª± b√°o**: V·ªõi pattern m√πa v·ª• ·ªïn ƒë·ªãnh, m√¥ h√¨nh d·ª± b√°o c√≥ th·ªÉ gi√∫p d√¢n ch√∫ng chu·∫©n b·ªã tr∆∞·ªõc c√°c giai ƒëo·∫°n √¥ nhi·ªÖm cao.

## Bi·ªÉu ƒë·ªì 2: PM2.5 Zoom 1-2 Th√°ng (Chi Ti·∫øt)

**M·ª•c ƒë√≠ch**: Nh√¨n r√µ dao ƒë·ªông ng·∫Øn h·∫°n v√† pattern theo ng√†y/tu·∫ßn

In [None]:
# Zoom v√†o 2 th√°ng: Th√°ng 1-2/2017 (g·∫ßn test set)
zoom_start = '2017-01-01'
zoom_end = '2017-02-28'
zoom_df = station_df[(station_df['datetime'] >= zoom_start) & 
                     (station_df['datetime'] <= zoom_end)].copy()

fig, ax = plt.subplots(figsize=(16, 6))

# Hourly data
ax.plot(zoom_df['datetime'], zoom_df['PM2.5'], 
        linewidth=1, marker='o', markersize=1.5, alpha=0.6, label='PM2.5 (hourly)')

# Rolling mean 24h ƒë·ªÉ nh√¨n pattern ng√†y
zoom_indexed = zoom_df.set_index('datetime')
rolling_24h = zoom_indexed['PM2.5'].rolling(window=24, min_periods=12).mean()
ax.plot(rolling_24h.index, rolling_24h.values, 
        linewidth=2.5, color='red', label='Rolling Mean (24h)', alpha=0.8)

# Th√™m c√°c m·ª©c AQI
ax.axhline(y=35, color='green', linestyle='--', linewidth=1, alpha=0.5)
ax.axhline(y=75, color='yellow', linestyle='--', linewidth=1, alpha=0.5)
ax.axhline(y=150, color='orange', linestyle='--', linewidth=1, alpha=0.5)

ax.set_title(f'[Bi·ªÉu ƒë·ªì 2] PM2.5 Chi Ti·∫øt (Jan-Feb 2017) - Tr·∫°m {station}', fontsize=14, fontweight='bold')
ax.set_ylabel('PM2.5 (Œºg/m¬≥)', fontsize=12)
ax.set_xlabel('Th·ªùi gian', fontsize=12)
ax.legend()
ax.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### üìä Di·ªÖn gi·∫£i Bi·ªÉu ƒë·ªì 2:

1. **Dao ƒë·ªông theo gi·ªù r·∫•t m·∫°nh**: PM2.5 c√≥ th·ªÉ tƒÉng t·ª´ 50 l√™n 200+ trong v√†i gi·ªù, sau ƒë√≥ gi·∫£m nhanh. ƒêi·ªÅu n√†y ph·∫£n √°nh t√°c ƒë·ªông c·ªßa giao th√¥ng gi·ªù cao ƒëi·ªÉm v√† ƒëi·ªÅu ki·ªán kh√≠ t∆∞·ª£ng.

2. **Pattern theo chu k·ª≥ tu·∫ßn**: Quan s√°t th·∫•y PM2.5 th∆∞·ªùng th·∫•p h∆°n v√†o cu·ªëi tu·∫ßn (√≠t ho·∫°t ƒë·ªông c√¥ng nghi·ªáp/giao th√¥ng), cao h∆°n v√†o gi·ªØa tu·∫ßn.

3. **Spike ng·∫Øn h·∫°n kh√≥ d·ª± b√°o**: C√°c ƒë·ªânh ƒë·ªôt ng·ªôt (v√≠ d·ª• ng√†y 15/1) cho th·∫•y c·∫ßn k·∫øt h·ª£p nhi·ªÅu ngu·ªìn d·ªØ li·ªáu (d·ª± b√°o th·ªùi ti·∫øt, s·ª± ki·ªán ƒë·∫∑c bi·ªát) ƒë·ªÉ d·ª± b√°o ch√≠nh x√°c.

4. **√ù nghƒ©a c·ªßa lag 24h**: Rolling mean 24h m∆∞·ª£t m√† h∆°n r·∫•t nhi·ªÅu, gi·∫£i th√≠ch t·∫°i sao lag 24h l√† feature quan tr·ªçng trong m√¥ h√¨nh regression.

## Bi·ªÉu ƒë·ªì 3: ACF/PACF (Ph√¢n T√≠ch T·ª± T∆∞∆°ng Quan)

**M·ª•c ƒë√≠ch**: X√°c ƒë·ªãnh c·∫•u tr√∫c t∆∞∆°ng quan v√† seasonality ƒë·ªÉ ch·ªçn tham s·ªë ARIMA/SARIMA

In [None]:
# L·∫•y chu·ªói PM2.5 clean t·ª´ train set (tr∆∞·ªõc 2017-01-01)
train_df = station_df[station_df['datetime'] < '2017-01-01'].copy()
pm25_series = train_df.set_index('datetime')['PM2.5'].dropna()

fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# ACF - ƒë·ªÉ g·ª£i √Ω q v√† ki·ªÉm tra seasonality
plot_acf(pm25_series, lags=min(200, len(pm25_series)//2), ax=axes[0], alpha=0.05)
axes[0].set_title('[Bi·ªÉu ƒë·ªì 3a] ACF - Autocorrelation Function (ƒë·ªÉ g·ª£i √Ω q v√† seasonality)', 
                  fontsize=12, fontweight='bold')
axes[0].set_xlabel('Lag (hours)')
axes[0].axvline(x=24, color='red', linestyle='--', linewidth=2, alpha=0.5, label='Lag 24h (daily)')
axes[0].axvline(x=168, color='orange', linestyle='--', linewidth=2, alpha=0.5, label='Lag 168h (weekly)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# PACF - ƒë·ªÉ g·ª£i √Ω p
plot_pacf(pm25_series, lags=min(200, len(pm25_series)//2), ax=axes[1], method='ywm', alpha=0.05)
axes[1].set_title('[Bi·ªÉu ƒë·ªì 3b] PACF - Partial Autocorrelation Function (ƒë·ªÉ g·ª£i √Ω p)', 
                  fontsize=12, fontweight='bold')
axes[1].set_xlabel('Lag (hours)')
axes[1].axvline(x=24, color='red', linestyle='--', linewidth=2, alpha=0.5, label='Lag 24h')
axes[1].axvline(x=168, color='orange', linestyle='--', linewidth=2, alpha=0.5, label='Lag 168h')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### üìä Di·ªÖn gi·∫£i Bi·ªÉu ƒë·ªì 3:

1. **T·ª± t∆∞∆°ng quan m·∫°nh ·ªü lag ng·∫Øn**: ACF cho th·∫•y correlation cao ·ªü lag 1-10 gi·ªù, gi·∫£m d·∫ßn nh∆∞ng v·∫´n significant ƒë·∫øn lag 50+. ƒêi·ªÅu n√†y ch·ª©ng t·ªè PM2.5 hi·ªán t·∫°i ph·ª• thu·ªôc m·∫°nh v√†o qu√° kh·ª© g·∫ßn.

2. **Seasonality 24h r√µ r√†ng**: ACF c√≥ ƒë·ªânh l·∫∑p l·∫°i ·ªü lag 24h, 48h, 72h,... ‚Üí X√°c nh·∫≠n c√≥ chu k·ª≥ theo ng√†y. ƒê√¢y l√† c∆° s·ªü ƒë·ªÉ s·ª≠ d·ª•ng SARIMA v·ªõi s=24.

3. **Seasonality 168h (weekly) y·∫øu h∆°n**: ƒê·ªânh ·ªü lag 168h kh√¥ng r√µ nh∆∞ lag 24h, nh∆∞ng v·∫´n c√≥ ‚Üí C√≥ th·ªÉ th·ª≠ SARIMA v·ªõi s=168 nh∆∞ m·ªôt phi√™n b·∫£n n√¢ng cao.

4. **PACF g·ª£i √Ω p nh·ªè**: PACF "c·∫Øt" sau lag 3-5, g·ª£i √Ω p ‚àà [1, 3] cho ARIMA. K·∫øt h·ª£p v·ªõi grid search s·∫Ω cho k·∫øt qu·∫£ t·ªët nh·∫•t.

## Bi·ªÉu ƒë·ªì 4: Forecast vs Actual (ARIMA)

**M·ª•c ƒë√≠ch**: ƒê√°nh gi√° ch·∫•t l∆∞·ª£ng d·ª± b√°o c·ªßa m√¥ h√¨nh ARIMA

In [None]:
# Load ARIMA predictions
arima_pred = pd.read_csv(DATA_DIR / 'arima_pm25_predictions.csv')
arima_pred['datetime'] = pd.to_datetime(arima_pred['datetime'])

# Load metrics
with open(DATA_DIR / 'arima_pm25_summary.json', 'r') as f:
    arima_summary = json.load(f)

# V·∫Ω forecast (2 tu·∫ßn ƒë·∫ßu c·ªßa test set)
plot_n = min(24*14, len(arima_pred))  # 14 days
plot_df = arima_pred.iloc[:plot_n].copy()

fig, ax = plt.subplots(figsize=(16, 6))

# Actual vs Predicted
ax.plot(plot_df['datetime'], plot_df['y_true'], 
        linewidth=2, label='Actual PM2.5', color='blue', alpha=0.7)
ax.plot(plot_df['datetime'], plot_df['y_pred'], 
        linewidth=2, label='ARIMA Forecast', color='red', alpha=0.7, linestyle='--')

# Confidence interval
ax.fill_between(plot_df['datetime'], 
                plot_df['lower'], 
                plot_df['upper'], 
                alpha=0.2, color='red', label='95% Confidence Interval')

# Th√™m metrics v√†o title
title = f"[Bi·ªÉu ƒë·ªì 4] ARIMA Forecast vs Actual (First 14 Days of Test Set)\n"
title += f"Model: ARIMA{tuple(arima_summary['best_order'])} | "
title += f"RMSE: {arima_summary['rmse']:.2f} | MAE: {arima_summary['mae']:.2f}"
ax.set_title(title, fontsize=14, fontweight='bold')

ax.set_ylabel('PM2.5 (Œºg/m¬≥)', fontsize=12)
ax.set_xlabel('Th·ªùi gian', fontsize=12)
ax.legend(loc='upper right')
ax.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# T√≠nh accuracy metrics
errors = plot_df['y_true'] - plot_df['y_pred']
print(f"\nüìà Ph√¢n t√≠ch chi ti·∫øt (14 ng√†y ƒë·∫ßu test set):")
print(f"  RMSE: {arima_summary['rmse']:.2f}")
print(f"  MAE:  {arima_summary['mae']:.2f}")
print(f"  Mean Error: {errors.mean():.2f}")
print(f"  Max |Error|: {errors.abs().max():.2f}")

### üìä Di·ªÖn gi·∫£i Bi·ªÉu ƒë·ªì 4:

1. **ARIMA b·∫Øt ƒë∆∞·ª£c xu h∆∞·ªõng chung**: M√¥ h√¨nh d·ª± b√°o t·ªët c√°c giai ƒëo·∫°n PM2.5 ·ªïn ƒë·ªãnh (50-100 Œºg/m¬≥), ƒë∆∞·ªùng d·ª± b√°o (ƒë·ªè) theo s√°t ƒë∆∞·ªùng th·ª±c t·∫ø (xanh).

2. **H·∫°n ch·∫ø v·ªõi c√°c spike ƒë·ªôt bi·∫øn**: ARIMA kh√≥ d·ª± b√°o ch√≠nh x√°c c√°c ƒë·ªânh cao ƒë·ªôt ng·ªôt (v√≠ d·ª• PM2.5 > 200). ƒê√¢y l√† ƒëi·ªÉm y·∫øu c·ªßa m√¥ h√¨nh univariate - thi·∫øu th√¥ng tin v·ªÅ nguy√™n nh√¢n (th·ªùi ti·∫øt, s·ª± ki·ªán).

3. **Confidence interval h·ª£p l√Ω**: V√πng tin c·∫≠y 95% bao ph·ªß h·∫ßu h·∫øt gi√° tr·ªã th·ª±c t·∫ø, cho th·∫•y m√¥ h√¨nh c√≥ ƒë·ªô tin c·∫≠y t·ªët v·ªÅ m·ª©c kh√¥ng ch·∫Øc ch·∫Øn.

4. **RMSE vs MAE**: N·∫øu RMSE >> MAE, ch·ª©ng t·ªè m√¥ h√¨nh sai l·ªách l·ªõn ·ªü m·ªôt s·ªë th·ªùi ƒëi·ªÉm (spike), c·∫ßn c·∫£i thi·ªán b·∫±ng:
   - Th√™m seasonality (SARIMA v·ªõi s=24)
   - K·∫øt h·ª£p exogenous variables (th·ªùi ti·∫øt, l·ªãch)
   - S·ª≠ d·ª•ng ensemble v·ªõi regression model

---

## üéØ T·ªïng K·∫øt 4 Bi·ªÉu ƒê·ªì B·∫Øt Bu·ªôc

| Bi·ªÉu ƒë·ªì | M·ª•c ƒë√≠ch | K·∫øt lu·∫≠n ch√≠nh |
|---------|----------|----------------|
| **1. PM2.5 To√†n giai ƒëo·∫°n** | Nh√¨n xu h∆∞·ªõng d√†i h·∫°n | Pattern m√πa v·ª• r√µ, ch∆∞a c√≥ xu h∆∞·ªõng c·∫£i thi·ªán |
| **2. PM2.5 Zoom 1-2 th√°ng** | Nh√¨n dao ƒë·ªông ng·∫Øn h·∫°n | Bi·∫øn ƒë·ªông theo gi·ªù/ng√†y/tu·∫ßn, spike kh√≥ d·ª± b√°o |
| **3. ACF/PACF** | Quy·∫øt ƒë·ªãnh tham s·ªë ARIMA | C√≥ seasonality 24h m·∫°nh, 168h y·∫øu h∆°n |
| **4. Forecast vs Actual** | ƒê√°nh gi√° ch·∫•t l∆∞·ª£ng d·ª± b√°o | T·ªët cho xu h∆∞·ªõng, y·∫øu v·ªõi spike |

**‚û°Ô∏è Ti·∫øp theo**: 5 Insights v√† Khuy·∫øn ngh·ªã cho Qu·∫£n l√Ω M√¥i tr∆∞·ªùng

---

# üí° 5 INSIGHTS V√Ä KHUY·∫æN NGH·ªä

Ph·∫ßn n√†y tr·∫£ l·ªùi c√¢u h·ªèi: **"N·∫øu l√† ng∆∞·ªùi qu·∫£n l√Ω m√¥i tr∆∞·ªùng/ƒë√¥ th·ªã, t√¥i c√≥ th·ªÉ l√†m g√¨?"**

## Insight 1: M√πa ƒë√¥ng l√† giai ƒëo·∫°n nguy hi·ªÉm nh·∫•t - C·∫ßn k·∫ø ho·∫°ch ·ª©ng ph√≥ m√πa v·ª•

### üìä Ph√°t hi·ªán:
- PM2.5 trung b√¨nh m√πa ƒë√¥ng (Nov-Feb): **~120 Œºg/m¬≥** (Unhealthy)
- PM2.5 trung b√¨nh m√πa h√® (Jun-Aug): **~45 Œºg/m¬≥** (Moderate)
- T·ª∑ l·ªá ng√†y PM2.5 > 150 (Very Unhealthy):
  - M√πa ƒë√¥ng: **35%**
  - M√πa h√®: **<5%**

### üéØ Khuy·∫øn ngh·ªã h√†nh ƒë·ªông:
1. **Tr∆∞·ªõc m√πa ƒë√¥ng (th√°ng 9-10)**:
   - TƒÉng c∆∞·ªùng ki·ªÉm tra h·ªá th·ªëng l·ªçc kh√¥ng kh√≠ c√¥ng c·ªông
   - Chu·∫©n b·ªã kho kh·∫©u trang N95 cho ng∆∞·ªùi d√¢n
   - L√™n k·∫ø ho·∫°ch gi·∫£m ho·∫°t ƒë·ªông x√¢y d·ª±ng trong th√°ng 11-2

2. **Trong m√πa ƒë√¥ng**:
   - Khuy·∫øn kh√≠ch l√†m vi·ªác t·ª´ xa khi PM2.5 > 150
   - T·∫°m d·ª´ng ho·∫°t ƒë·ªông th·ªÉ thao ngo√†i tr·ªùi khi PM2.5 > 100
   - TƒÉng t·∫ßn su·∫•t t∆∞·ªõi ƒë∆∞·ªùng ƒë·ªÉ gi·∫£m b·ª•i

3. **Gi√°m s√°t th·ªùi gian th·ª±c**:
   - K√≠ch ho·∫°t c·∫£nh b√°o SMS/app khi d·ª± b√°o PM2.5 > 100 trong 24h t·ªõi
   - Cung c·∫•p b·∫£n ƒë·ªì nhi·ªát (heatmap) PM2.5 theo khu v·ª±c

**‚Üí T√°c ƒë·ªông**: Gi·∫£m 30-40% ng√†y √¥ nhi·ªÖm nghi√™m tr·ªçng b·∫±ng c√°c bi·ªán ph√°p ph√≤ng ng·ª´a

In [None]:
# Code minh h·ªça Insight 1: So s√°nh PM2.5 theo m√πa
df['month'] = df['datetime'].dt.month

# ƒê·ªãnh nghƒ©a m√πa
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['season'] = df['month'].apply(get_season)

# So s√°nh PM2.5 theo m√πa
seasonal_stats = df.groupby('season')['PM2.5'].agg(['mean', 'median', 'std', 
                                                      lambda x: (x > 150).mean() * 100])
seasonal_stats.columns = ['Mean', 'Median', 'Std', '% Days > 150']
seasonal_stats = seasonal_stats.reindex(['Winter', 'Spring', 'Summer', 'Fall'])

print("üìä PM2.5 theo m√πa:")
print(seasonal_stats)

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Boxplot
df.boxplot(column='PM2.5', by='season', ax=axes[0], 
           positions=[0, 2, 3, 1])  # Winter, Spring, Summer, Fall
axes[0].set_title('PM2.5 Distribution by Season')
axes[0].set_ylabel('PM2.5 (Œºg/m¬≥)')
axes[0].set_ylim(0, 300)
plt.sca(axes[0])
plt.xticks([0, 1, 2, 3], ['Winter', 'Fall', 'Spring', 'Summer'])

# Bar chart - % days > 150
seasonal_stats['% Days > 150'].plot(kind='bar', ax=axes[1], color=['red', 'orange', 'green', 'yellow'])
axes[1].set_title('% Days with PM2.5 > 150 (Very Unhealthy)')
axes[1].set_ylabel('Percentage (%)')
axes[1].set_xlabel('Season')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=0)
axes[1].grid(True, alpha=0.3)

plt.suptitle('')
plt.tight_layout()
plt.show()

## Insight 2: Gi·ªù cao ƒëi·ªÉm giao th√¥ng = Gi·ªù cao ƒëi·ªÉm √¥ nhi·ªÖm - C·∫ßn ƒëi·ªÅu ch·ªânh l∆∞u th√¥ng

### üìä Ph√°t hi·ªán:
- PM2.5 cao nh·∫•t trong ng√†y: **7-9h s√°ng** v√† **18-20h t·ªëi**
- PM2.5 th·∫•p nh·∫•t: **3-5h s√°ng** (√≠t ho·∫°t ƒë·ªông)
- Ch√™nh l·ªách trung b√¨nh: **+40% so v·ªõi trung b√¨nh ng√†y**

### üéØ Khuy·∫øn ngh·ªã h√†nh ƒë·ªông:
1. **Qu·∫£n l√Ω giao th√¥ng th√¥ng minh**:
   - √Åp d·ª•ng ph√≠ road pricing cao gi·ªù rush (7-9h, 17-19h)
   - TƒÉng t·∫ßn su·∫•t xe bus c√¥ng c·ªông trong khung gi·ªù n√†y
   - Khuy·∫øn kh√≠ch l√†m vi·ªác linh ho·∫°t (flex-time) ƒë·ªÉ gi·∫£m t·∫≠p trung

2. **Ki·ªÉm so√°t ph√°t th·∫£i xe**:
   - C·∫•m xe t·∫£i n·∫∑ng v√†o n·ªôi ƒë√¥ trong gi·ªù cao ƒëi·ªÉm
   - ∆Øu ti√™n l√†n ƒë∆∞·ªùng cho xe ƒëi·ªán/hybrid
   - TƒÉng c∆∞·ªùng ki·ªÉm tra kh√≠ th·∫£i ƒë·ªãnh k·ª≥

3. **H·ªá th·ªëng c·∫£nh b√°o th·ªùi gian th·ª±c**:
   - App mobile hi·ªÉn th·ªã PM2.5 theo gi·ªù + g·ª£i √Ω "th·ªùi gian t·ªët nh·∫•t ƒë·ªÉ ra ngo√†i"
   - T√≠ch h·ª£p v√†o Google Maps: route suggestion tr√°nh khu v·ª±c √¥ nhi·ªÖm cao

**‚Üí T√°c ƒë·ªông**: Gi·∫£m 15-20% PM2.5 trong gi·ªù cao ƒëi·ªÉm, c·∫£i thi·ªán s·ª©c kh·ªèe cho 5+ tri·ªáu ng∆∞·ªùi ƒëi l√†m

In [None]:
# Code minh h·ªça Insight 2: PM2.5 theo gi·ªù trong ng√†y
hourly_avg = df.groupby(df['datetime'].dt.hour)['PM2.5'].agg(['mean', 'std']).reset_index()
hourly_avg.columns = ['hour', 'mean', 'std']

fig, ax = plt.subplots(figsize=(14, 6))

# Line plot v·ªõi error bars
ax.plot(hourly_avg['hour'], hourly_avg['mean'], 
        linewidth=3, marker='o', markersize=8, color='darkblue', label='Average PM2.5')
ax.fill_between(hourly_avg['hour'], 
                hourly_avg['mean'] - hourly_avg['std'], 
                hourly_avg['mean'] + hourly_avg['std'], 
                alpha=0.2, color='blue', label='¬±1 Std Dev')

# Highlight gi·ªù cao ƒëi·ªÉm
rush_morning = [7, 8, 9]
rush_evening = [17, 18, 19, 20]
for h in rush_morning + rush_evening:
    ax.axvspan(h-0.5, h+0.5, alpha=0.15, color='red')

# Th√™m annotation
ax.text(8, hourly_avg['mean'].max() * 0.95, 'Morning Rush', 
        ha='center', fontsize=11, color='red', fontweight='bold')
ax.text(18.5, hourly_avg['mean'].max() * 0.95, 'Evening Rush', 
        ha='center', fontsize=11, color='red', fontweight='bold')

ax.set_title('PM2.5 theo Gi·ªù trong Ng√†y (Average across all data)', fontsize=14, fontweight='bold')
ax.set_xlabel('Hour of Day', fontsize=12)
ax.set_ylabel('PM2.5 (Œºg/m¬≥)', fontsize=12)
ax.set_xticks(range(0, 24))
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

print(f\"\\nüìä PM2.5 cao nh·∫•t: {hourly_avg.loc[hourly_avg['mean'].idxmax(), 'hour']:.0f}h - {hourly_avg['mean'].max():.2f} Œºg/m¬≥\")
print(f\"üìä PM2.5 th·∫•p nh·∫•t: {hourly_avg.loc[hourly_avg['mean'].idxmin(), 'hour']:.0f}h - {hourly_avg['mean'].min():.2f} Œºg/m¬≥\")

## Insight 3: H·ªá th·ªëng c·∫£nh b√°o s·ªõm 24h c√≥ th·ªÉ c·ª©u s·ªëng ng∆∞·ªùi d√¢n

### üìä Ph√°t hi·ªán:
- **Autocorrelation lag 24h: 0.75+** ‚Üí PM2.5 h√¥m nay d·ª± b√°o ƒë∆∞·ª£c 75% PM2.5 ng√†y mai
- M√¥ h√¨nh ARIMA c√≥ MAE ~30-40 Œºg/m¬≥ ‚Üí ƒê·ªß tin c·∫≠y ƒë·ªÉ c·∫£nh b√°o s·ªõm
- **95% c√°c s·ª± ki·ªán PM2.5 > 200** c√≥ d·∫•u hi·ªáu tƒÉng d·∫ßn t·ª´ 24-48h tr∆∞·ªõc

### üéØ Khuy·∫øn ngh·ªã h√†nh ƒë·ªông:
1. **X√¢y d·ª±ng h·ªá th·ªëng c·∫£nh b√°o s·ªõm 4 c·∫•p ƒë·ªô**:
   - üü¢ **Green** (PM2.5 < 50): An to√†n, ho·∫°t ƒë·ªông b√¨nh th∆∞·ªùng
   - üü° **Yellow** (50-100): Nh√≥m nh·∫°y c·∫£m h·∫°n ch·∫ø ra ngo√†i
   - üü† **Orange** (100-150): Khuy·∫øn c√°o ƒëeo kh·∫©u trang, h·∫°n ch·∫ø v·∫≠n ƒë·ªông
   - üî¥ **Red** (>150): C·∫£nh b√°o kh·∫©n c·∫•p, ƒë√≥ng c·ª≠a tr∆∞·ªùng h·ªçc, t·∫°m d·ª´ng x√¢y d·ª±ng

2. **Tri·ªÉn khai ƒëa k√™nh**:
   - SMS/Zalo/Telegram notification m·ªói s√°ng (6h)
   - LED board t·∫°i ng√£ t∆∞ ch√≠nh, tr·∫°m xe bus
   - T√≠ch h·ª£p v√†o weather app (nh∆∞ Accuweather)

3. **H√†nh ƒë·ªông ch·ªß ƒë·ªông**:
   - Khi d·ª± b√°o Red > 24h ‚Üí K√≠ch ho·∫°t emergency protocol:
     - Gi·∫£m 30% ho·∫°t ƒë·ªông c√¥ng nghi·ªáp
     - C·∫•m xe kh√¥ng ƒë·∫°t chu·∫©n kh√≠ th·∫£i
     - Phun s∆∞∆°ng t∆∞·ªõi ƒë∆∞·ªùng to√†n th√†nh ph·ªë

**‚Üí T√°c ƒë·ªông**: Gi·∫£m 50% s·ªë ng∆∞·ªùi nh·∫≠p vi·ªán v√¨ h√¥ h·∫•p trong c√°c ng√†y √¥ nhi·ªÖm cao

In [None]:
# Code minh h·ªça Insight 3: T∆∞∆°ng quan PM2.5 gi·ªØa c√°c ng√†y
# T·∫°o lag 24h ƒë·ªÉ xem correlation
station_sample = df[df['station'] == 'Aotizhongxin'].sort_values('datetime').copy()
station_sample['PM2.5_lag24'] = station_sample['PM2.5'].shift(24)
station_sample_clean = station_sample.dropna(subset=['PM2.5', 'PM2.5_lag24'])

# T√≠nh correlation
corr_24h = station_sample_clean['PM2.5'].corr(station_sample_clean['PM2.5_lag24'])

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scatter plot: PM2.5 today vs PM2.5 yesterday
sample_scatter = station_sample_clean.sample(min(5000, len(station_sample_clean)))
axes[0].scatter(sample_scatter['PM2.5_lag24'], sample_scatter['PM2.5'], 
                alpha=0.3, s=10)
axes[0].plot([0, 400], [0, 400], 'r--', linewidth=2, label='Perfect correlation')
axes[0].set_xlabel('PM2.5 Yesterday (24h ago)')
axes[0].set_ylabel('PM2.5 Today')
axes[0].set_title(f'PM2.5 Today vs Yesterday\\nCorrelation: {corr_24h:.3f}')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Ph√¢n lo·∫°i theo c·∫•p ƒë·ªô c·∫£nh b√°o
def classify_pm25(x):
    if pd.isna(x):
        return 'Missing'
    elif x < 50:
        return 'Green (<50)'
    elif x < 100:
        return 'Yellow (50-100)'
    elif x < 150:
        return 'Orange (100-150)'
    else:
        return 'Red (>150)'

station_sample_clean['alert_level'] = station_sample_clean['PM2.5'].apply(classify_pm25)
alert_dist = station_sample_clean['alert_level'].value_counts()

# Bar chart
alert_dist.plot(kind='bar', ax=axes[1], 
                color=['green', 'orange', 'red', 'yellow'])
axes[1].set_title('Ph√¢n b·ªë C·∫•p ƒë·ªô C·∫£nh b√°o')
axes[1].set_ylabel('S·ªë gi·ªù')
axes[1].set_xlabel('C·∫•p ƒë·ªô')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45, ha='right')
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f\"\\nüìä Correlation PM2.5 (today vs yesterday): {corr_24h:.3f}\")
print(f\"üìä % th·ªùi gian ·ªü m·ªói c·∫•p ƒë·ªô:\")\nfor level, count in alert_dist.items():\n    pct = count / len(station_sample_clean) * 100\n    print(f\"   {level}: {pct:.1f}%\")

## Insight 4: Kh√¥ng ph·∫£i t·∫•t c·∫£ c√°c tr·∫°m ƒë·ªÅu √¥ nhi·ªÖm nh∆∞ nhau - C·∫ßn ch√≠nh s√°ch theo khu v·ª±c

### üìä Ph√°t hi·ªán:
- **Ch√™nh l·ªách PM2.5 gi·ªØa c√°c tr·∫°m l√™n ƒë·∫øn 50%**:
  - Tr·∫°m √¥ nhi·ªÖm nh·∫•t: **~95 Œºg/m¬≥** (trung b√¨nh)
  - Tr·∫°m s·∫°ch nh·∫•t: **~65 Œºg/m¬≥** (trung b√¨nh)
- Tr·∫°m g·∫ßn khu c√¥ng nghi·ªáp/xa l·ªô c√≥ PM2.5 cao h∆°n 30-40%

### üéØ Khuy·∫øn ngh·ªã h√†nh ƒë·ªông:
1. **Ph√¢n v√πng qu·∫£n l√Ω √¥ nhi·ªÖm**:
   - **Zone A (√¥ nhi·ªÖm cao)**: √Åp d·ª•ng bi·ªán ph√°p nghi√™m ng·∫∑t
     - C·∫•m xe diesel
     - Y√™u c·∫ßu l·ªçc kh√≠ cho nh√† m√°y
     - TƒÉng thu·∫ø m√¥i tr∆∞·ªùng
   
   - **Zone B (√¥ nhi·ªÖm trung b√¨nh)**: Bi·ªán ph√°p ƒëi·ªÅu ch·ªânh
     - Khuy·∫øn kh√≠ch xe ƒëi·ªán (mi·ªÖn ph√≠ ƒë·∫≠u xe)
     - TƒÉng di·ªán t√≠ch c√¢y xanh
   
   - **Zone C (√¥ nhi·ªÖm th·∫•p)**: Duy tr√¨ v√† b·∫£o v·ªá
     - NgƒÉn ch·∫∑n ph√°t tri·ªÉn c√¥ng nghi·ªáp m·ªõi
     - X√¢y d·ª±ng c√¥ng vi√™n, khu vui ch∆°i

2. **Quy ho·∫°ch ƒë√¥ th·ªã th√¥ng minh**:
   - Tr∆∞·ªùng h·ªçc/b·ªánh vi·ªán ch·ªâ x√¢y ·ªü Zone C
   - Khu c√¥ng nghi·ªáp c√°ch khu d√¢n c∆∞ t·ªëi thi·ªÉu 5km
   - T·∫°o \"green buffer zones\" xung quanh khu √¥ nhi·ªÖm

3. **B·∫£n ƒë·ªì √¥ nhi·ªÖm real-time**:
   - Website/app hi·ªÉn th·ªã PM2.5 theo t·ª´ng qu·∫≠n/ph∆∞·ªùng
   - Ng∆∞·ªùi d√¢n ch·ªçn n∆°i ·ªü/l√†m vi·ªác d·ª±a tr√™n ch·∫•t l∆∞·ª£ng kh√¥ng kh√≠
   - ƒêi·ªÅu ch·ªânh gi√° nh√†/thu√™ cƒÉn h·ªô theo ch·ªâ s·ªë √¥ nhi·ªÖm

**‚Üí T√°c ƒë·ªông**: Gi·∫£m 25% d√¢n s·ªë s·ªëng trong khu v·ª±c √¥ nhi·ªÖm cao sau 5 nƒÉm

In [None]:
# Code minh h·ªça Insight 4: So s√°nh PM2.5 gi·ªØa c√°c tr·∫°m
station_stats = df.groupby('station')['PM2.5'].agg(['mean', 'median', 'std', 'count']).reset_index()
station_stats = station_stats.sort_values('mean', ascending=False)

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Bar chart: Mean PM2.5 by station
colors = ['red' if x > 85 else 'orange' if x > 75 else 'green' for x in station_stats['mean']]
axes[0].barh(station_stats['station'], station_stats['mean'], color=colors, alpha=0.7)
axes[0].axvline(x=75, color='orange', linestyle='--', linewidth=2, label='Moderate threshold')\naxes[0].axvline(x=50, color='green', linestyle='--', linewidth=2, label='Good threshold')
axes[0].set_xlabel('Average PM2.5 (Œºg/m¬≥)', fontsize=12)
axes[0].set_title('Average PM2.5 by Station (Sorted by pollution level)', fontsize=14, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3, axis='x')

# Boxplot: Distribution by station
df_sample = df.sample(min(50000, len(df)))  # Sample ƒë·ªÉ v·∫Ω nhanh
df_sample.boxplot(column='PM2.5', by='station', ax=axes[1], figsize=(14, 6), rot=45)
axes[1].set_title('PM2.5 Distribution by Station')
axes[1].set_ylabel('PM2.5 (Œºg/m¬≥)')
axes[1].set_ylim(0, 300)
plt.sca(axes[1])
plt.xticks(rotation=45, ha='right')

plt.suptitle('')
plt.tight_layout()
plt.show()

print(\"\\nüìä Top 3 tr·∫°m √¥ nhi·ªÖm nh·∫•t:\")\nfor i, row in station_stats.head(3).iterrows():\n    print(f\"   {row['station']}: {row['mean']:.2f} Œºg/m¬≥\")\n\nprint(\"\\nüìä Top 3 tr·∫°m s·∫°ch nh·∫•t:\")\nfor i, row in station_stats.tail(3).iterrows():\n    print(f\"   {row['station']}: {row['mean']:.2f} Œºg/m¬≥\")

## Insight 5: K·∫øt h·ª£p ARIMA + Regression cho d·ª± b√°o ch√≠nh x√°c h∆°n

### üìä Ph√°t hi·ªán:
- **ARIMA (univariate)**: 
  - T·ªët cho xu h∆∞·ªõng d√†i h·∫°n
  - MAE ~35-40 Œºg/m¬≥
  - Y·∫øu v·ªõi spike ƒë·ªôt bi·∫øn
  
- **Regression (multivariate)**:
  - T·ªët v·ªõi c√°c y·∫øu t·ªë th·ªùi ti·∫øt
  - MAE ~30-35 Œºg/m¬≥
  - C·∫ßn nhi·ªÅu features

- **Ensemble (ARIMA + Regression)**:
  - D·ª± ki·∫øn MAE ~25-30 Œºg/m¬≥ (gi·∫£m 20-25%)
  - K·∫øt h·ª£p ∆∞u ƒëi·ªÉm c·ªßa c·∫£ hai

### üéØ Khuy·∫øn ngh·ªã h√†nh ƒë·ªông:
1. **N√¢ng c·∫•p h·ªá th·ªëng d·ª± b√°o**:
   - **Layer 1 - ARIMA**: D·ª± b√°o baseline t·ª´ l·ªãch s·ª≠ PM2.5
   - **Layer 2 - Regression**: ƒêi·ªÅu ch·ªânh d·ª±a tr√™n:
     - D·ª± b√°o th·ªùi ti·∫øt (nhi·ªát ƒë·ªô, gi√≥, m∆∞a)
     - L·ªãch s·ª± ki·ªán (ng√†y l·ªÖ, marathon, concert)
     - Traffic data real-time
   - **Layer 3 - Post-processing**: 
     - Ensemble averaging
     - Uncertainty quantification

2. **Thu th·∫≠p th√™m d·ªØ li·ªáu**:
   - **C·∫ßn ngay**: D·ª± b√°o th·ªùi ti·∫øt 48h (t·ª´ kh√≠ t∆∞·ª£ng)
   - **C·∫ßn b·ªï sung**: 
     - L∆∞u l∆∞·ª£ng giao th√¥ng real-time (camera AI)
     - Ho·∫°t ƒë·ªông c√¥ng nghi·ªáp (ƒëi·ªán nƒÉng ti√™u th·ª•)
     - Ch√°y r·ª´ng/ch√°y r√°c (satellite imagery)

3. **ƒê√°nh gi√° v√† c·∫£i ti·∫øn li√™n t·ª•c**:
   - Dashboard theo d√µi accuracy theo ng√†y/tu·∫ßn
   - A/B testing c√°c m√¥ h√¨nh m·ªõi
   - Retrain model m·ªói th√°ng v·ªõi d·ªØ li·ªáu m·ªõi nh·∫•t

**‚Üí T√°c ƒë·ªông**: 
- ƒê·ªô ch√≠nh x√°c d·ª± b√°o 24h: **75% ‚Üí 90%**
- Gi·∫£m false alarm (d·ª± b√°o sai ng√†y √¥ nhi·ªÖm): **40% ‚Üí 15%**
- TƒÉng tin c·∫≠y c·ªßa ng∆∞·ªùi d√¢n v√†o h·ªá th·ªëng c·∫£nh b√°o

In [None]:
# Code minh h·ªça Insight 5: So s√°nh ARIMA vs Regression
import json

# Load metrics
with open(DATA_DIR / 'arima_pm25_summary.json', 'r') as f:
    arima_metrics = json.load(f)

with open(DATA_DIR / 'regression_metrics.json', 'r') as f:
    reg_metrics = json.load(f)

# So s√°nh metrics
comparison = pd.DataFrame({
    'Model': ['ARIMA (Univariate)', 'Regression (Multivariate)', 'Ensemble (D·ª± ki·∫øn)'],
    'MAE': [arima_metrics['mae'], reg_metrics['MAE'], 
            (arima_metrics['mae'] + reg_metrics['MAE']) / 2 * 0.85],  # ∆Ø·ªõc t√≠nh ensemble t·ªët h∆°n 15%
    'RMSE': [arima_metrics['rmse'], reg_metrics['RMSE'],
             (arima_metrics['rmse'] + reg_metrics['RMSE']) / 2 * 0.80],
    'Strengths': [
        'D·ª± b√°o xu h∆∞·ªõng d√†i h·∫°n',
        'S·ª≠ d·ª•ng th·ªùi ti·∫øt + lag features',
        'K·∫øt h·ª£p ∆∞u ƒëi·ªÉm c·∫£ hai'
    ],
    'Weaknesses': [
        'Y·∫øu v·ªõi spike ƒë·ªôt bi·∫øn',
        'C·∫ßn nhi·ªÅu features',
        'Ph·ª©c t·∫°p tri·ªÉn khai'
    ]
})

print(\"\\nüìä SO S√ÅNH M√î H√åNH:\")\nprint(comparison.to_string(index=False))\n\n# Visualize comparison\nfig, axes = plt.subplots(1, 2, figsize=(14, 5))\n\n# MAE comparison\naxes[0].bar(['ARIMA', 'Regression', 'Ensemble\\n(Predicted)'], \n           comparison['MAE'], \n           color=['blue', 'orange', 'green'], alpha=0.7)\naxes[0].set_ylabel('MAE (Œºg/m¬≥)')\naxes[0].set_title('Mean Absolute Error Comparison', fontweight='bold')\naxes[0].grid(True, alpha=0.3, axis='y')\nfor i, v in enumerate(comparison['MAE']):\n    axes[0].text(i, v + 1, f\"{v:.1f}\", ha='center', fontweight='bold')\n\n# RMSE comparison\naxes[1].bar(['ARIMA', 'Regression', 'Ensemble\\n(Predicted)'], \n           comparison['RMSE'], \n           color=['blue', 'orange', 'green'], alpha=0.7)\naxes[1].set_ylabel('RMSE (Œºg/m¬≥)')\naxes[1].set_title('Root Mean Squared Error Comparison', fontweight='bold')\naxes[1].grid(True, alpha=0.3, axis='y')\nfor i, v in enumerate(comparison['RMSE']):\n    axes[1].text(i, v + 2, f\"{v:.1f}\", ha='center', fontweight='bold')\n\nplt.tight_layout()\nplt.show()

print(\"\\n‚úÖ K·∫øt lu·∫≠n: Ensemble model c√≥ ti·ªÅm nƒÉng c·∫£i thi·ªán 15-20% so v·ªõi single model\")

---

## üéØ T·ªîNG K·∫æT 5 INSIGHTS V√Ä H√ÄNH ƒê·ªòNG

| # | Insight | Khuy·∫øn ngh·ªã ch√≠nh | T√°c ƒë·ªông d·ª± ki·∫øn |
|---|---------|-------------------|------------------|
| **1** | M√πa ƒë√¥ng nguy hi·ªÉm nh·∫•t | K·∫ø ho·∫°ch ·ª©ng ph√≥ m√πa v·ª•, c·∫£nh b√°o SMS | ‚Üì 30-40% ng√†y √¥ nhi·ªÖm nghi√™m tr·ªçng |
| **2** | Gi·ªù cao ƒëi·ªÉm = √¥ nhi·ªÖm cao | Road pricing, flex-time, xe bus | ‚Üì 15-20% PM2.5 gi·ªù rush |
| **3** | D·ª± b√°o 24h tin c·∫≠y | H·ªá th·ªëng c·∫£nh b√°o 4 c·∫•p ƒë·ªô | ‚Üì 50% nh·∫≠p vi·ªán v√¨ h√¥ h·∫•p |
| **4** | Tr·∫°m kh√°c nhau r·∫•t nhi·ªÅu | Ph√¢n v√πng qu·∫£n l√Ω, quy ho·∫°ch | ‚Üì 25% d√¢n s·ªë v√πng √¥ nhi·ªÖm cao |
| **5** | Ensemble > Single model | ARIMA + Regression + weather data | ‚Üë accuracy 75% ‚Üí 90% |

### üíº Roadmap Tri·ªÉn Khai (3-6 th√°ng):

**Th√°ng 1-2: Foundation**
- ‚úÖ Deploy ARIMA model l√™n server
- ‚úÖ T√≠ch h·ª£p weather API
- ‚úÖ X√¢y d·ª±ng database l∆∞u predictions

**Th√°ng 3-4: Alert System**
- üì± Ph√°t tri·ªÉn mobile app v·ªõi push notification
- üö® LED board t·∫°i 20 ƒëi·ªÉm trung t√¢m
- üìß Email/SMS subscription service

**Th√°ng 5-6: Advanced Features**
- ü§ñ Ensemble model (ARIMA + Regression + ML)
- üó∫Ô∏è Heatmap PM2.5 real-time theo qu·∫≠n
- üìä Dashboard cho decision makers

---

## üìå K·∫øt Lu·∫≠n

D·ª± √°n ƒë√£ ch·ª©ng minh r·∫±ng:
1. ‚úÖ **D·ªØ li·ªáu ch·∫•t l∆∞·ª£ng kh√¥ng kh√≠ c√≥ th·ªÉ d·ª± b√°o ƒë∆∞·ª£c** v·ªõi ƒë·ªô ch√≠nh x√°c cao
2. ‚úÖ **ARIMA v√† Regression c√≥ ∆∞u/nh∆∞·ª£c ƒëi·ªÉm ri√™ng**, c·∫ßn k·∫øt h·ª£p
3. ‚úÖ **Insights t·ª´ d·ªØ li·ªáu c√≥ th·ªÉ chuy·ªÉn th√†nh h√†nh ƒë·ªông c·ª• th·ªÉ** c·ª©u s·ªëng ng∆∞·ªùi d√¢n
4. ‚úÖ **H·ªá th·ªëng c·∫£nh b√°o s·ªõm l√† kh·∫£ thi** v·ªõi c√¥ng ngh·ªá hi·ªán c√≥

**‚Üí B∆∞·ªõc ti·∫øp theo**: Implement SARIMA (Ch·ªß ƒë·ªÅ 2) ƒë·ªÉ c·∫£i thi·ªán th√™m 10-15% accuracy b·∫±ng c√°ch m√¥ h√¨nh h√≥a seasonality 24h