In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import os
import warnings
warnings.filterwarnings('ignore')

# ==============================
# 1. Load & Preprocess Data
# ==============================
df = pd.read_csv('acc_data_final_with_daynight.csv')

# 🔧 แก้ไขตรงนี้: ใช้ dayfirst=True + format='mixed' เพื่อรองรับ DD/MM/YYYY
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'], dayfirst=True, format='mixed')
df = df.sort_values('datetime').reset_index(drop=True)  # เรียงตามเวลา

# Feature Engineering
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['month'] = df['datetime'].dt.month
df['กลางวันกลางคืน'] = df['กลางวันกลางคืน'].map({'กลางวัน': 1, 'กลางคืน': 0})

# One-hot encode condition
df = pd.get_dummies(df, columns=['condition'], prefix='cond', drop_first=True)

# เลือก features และ targets
feature_cols = [
    'temperature_F', 'humidity_%', 'pressure_in',
    'hour', 'day_of_week', 'month', 'กลางวันกลางคืน'
] + [col for col in df.columns if col.startswith('cond_')]

target_cols = ['เกิดเหตุ', 'รถน้อยกว่า4ล้อacc', 'รถ4ล้อacc', 'รถมากกว่า4ล้อacc']

X = df[feature_cols].values
y = df[target_cols].values

# Normalize features
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y)

# ==============================
# 2. Create Sequences (Sliding Window)
# ==============================
def create_sequences(X, y, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i + seq_length])
        y_seq.append(y[i + seq_length])  # predict next step
    return np.array(X_seq), np.array(y_seq)

SEQ_LENGTH = 24  # ใช้ข้อมูลย้อนหลัง 24 ช่วงเวลา

X_seq, y_seq = create_sequences(X_scaled, y_scaled, SEQ_LENGTH)

# แบ่ง train/test (ตามลำดับเวลา)
split_idx = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=32, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

print(f"✅ Train sequences: {X_train.shape}, Test sequences: {X_test.shape}")

# ==============================
# 3. Define LSTM Model
# ==============================
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # ใช้ output ของ timestep สุดท้าย
        return out

# Hyperparameters
INPUT_SIZE = X_train.shape[2]
HIDDEN_SIZE = 64
NUM_LAYERS = 2
OUTPUT_SIZE = len(target_cols)
LEARNING_RATE = 0.001
NUM_EPOCHS = 50

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ==============================
# 4. TensorBoard Setup
# ==============================
log_dir = "runs/lstm_acc_prediction"
os.makedirs(log_dir, exist_ok=True)
writer = SummaryWriter(log_dir=log_dir)

# ==============================
# 5. Training Loop
# ==============================
print("Starting LSTM Training")

best_test_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0

    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # Evaluate on test set
    model.eval()
    test_loss = 0.0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item()

            all_preds.append(outputs.cpu().numpy())
            all_targets.append(batch_y.cpu().numpy())

    avg_test_loss = test_loss / len(test_loader)

    # Inverse scaling เพื่อคำนวณ MAE/R2 บนค่าจริง
    all_preds = np.vstack(all_preds)
    all_targets = np.vstack(all_targets)

    all_preds_original = scaler_y.inverse_transform(all_preds)
    all_targets_original = scaler_y.inverse_transform(all_targets)

    mae = mean_absolute_error(all_targets_original, all_preds_original)
    r2 = r2_score(all_targets_original, all_preds_original, multioutput='uniform_average')

    # Log to TensorBoard
    writer.add_scalar('Loss/Train', avg_train_loss, epoch)
    writer.add_scalar('Loss/Test', avg_test_loss, epoch)
    writer.add_scalar('MAE/Test', mae, epoch)
    writer.add_scalar('R2/Test', r2, epoch)

    # Log per-target MAE
    for i, target_name in enumerate(target_cols):
        mae_target = mean_absolute_error(all_targets_original[:, i], all_preds_original[:, i])
        writer.add_scalar(f'MAE_per_target/{target_name}', mae_target, epoch)

    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
              f"Train Loss: {avg_train_loss:.6f}, Test Loss: {avg_test_loss:.6f}, MAE: {mae:.4f}, R2: {r2:.4f}")

    # Save best model
    if avg_test_loss < best_test_loss:
        best_test_loss = avg_test_loss
        torch.save(model.state_dict(), 'lstm_best_model.pth')
        print(f"✅ Model saved at epoch {epoch+1} with Test Loss: {avg_test_loss:.6f}")

writer.close()
print("Training Complete!")

# ==============================
# 6. Final Evaluation & Save Info
# ==============================
model.load_state_dict(torch.load('lstm_best_model.pth', map_location=device))
model.eval()

all_preds = []
all_targets = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        all_preds.append(outputs.cpu().numpy())
        all_targets.append(batch_y.cpu().numpy())

all_preds = np.vstack(all_preds)
all_targets = np.vstack(all_targets)

all_preds_original = scaler_y.inverse_transform(all_preds)
all_targets_original = scaler_y.inverse_transform(all_targets)

final_mae = mean_absolute_error(all_targets_original, all_preds_original)
final_r2 = r2_score(all_targets_original, all_preds_original, multioutput='uniform_average')

print("\n" + "="*50)
print("FINAL LSTM MODEL PERFORMANCE")
print("="*50)
print(f"MAE: {final_mae:.4f}")
print(f"R²: {final_r2:.4f}")

# Save to text file
with open('lstm_model_info.txt', 'w', encoding='utf-8') as f:
    f.write("LSTM Model Performance\n")
    f.write("="*30 + "\n")
    f.write(f"MAE: {final_mae:.4f}\n")
    f.write(f"R²: {final_r2:.4f}\n")
    f.write(f"Sequence Length: {SEQ_LENGTH}\n")
    f.write(f"Hidden Size: {HIDDEN_SIZE}\n")
    f.write(f"Epochs: {NUM_EPOCHS}\n")

print("✅ Saved model info to 'lstm_model_info.txt'")
print("✅ Model saved as 'lstm_best_model.pth'")
print("✅ TensorBoard logs saved in 'runs/lstm_acc_prediction'")

# ==============================
# 7. View TensorBoard
# ==============================
print("\nTo view training logs in TensorBoard, run:")
print("   tensorboard --logdir=runs/lstm_acc_prediction")
print("   then open http://localhost:6006")

  from .autonotebook import tqdm as notebook_tqdm


✅ Train sequences: torch.Size([14016, 24, 9]), Test sequences: torch.Size([3505, 24, 9])
🚀 Starting LSTM Training...
✅ Model saved at epoch 1 with Test Loss: 0.000611
✅ Model saved at epoch 2 with Test Loss: 0.000611
✅ Model saved at epoch 3 with Test Loss: 0.000607
✅ Model saved at epoch 4 with Test Loss: 0.000605
✅ Model saved at epoch 6 with Test Loss: 0.000605
✅ Model saved at epoch 7 with Test Loss: 0.000605
✅ Model saved at epoch 8 with Test Loss: 0.000605
✅ Model saved at epoch 9 with Test Loss: 0.000604
Epoch [10/100] Train Loss: 0.001569, Test Loss: 0.000604, MAE: 0.0086, R2: -0.0910
✅ Model saved at epoch 10 with Test Loss: 0.000604
✅ Model saved at epoch 11 with Test Loss: 0.000602
✅ Model saved at epoch 14 with Test Loss: 0.000602
✅ Model saved at epoch 15 with Test Loss: 0.000600
Epoch [20/100] Train Loss: 0.001566, Test Loss: 0.000601, MAE: 0.0081, R2: -0.0840
✅ Model saved at epoch 21 with Test Loss: 0.000600
Epoch [30/100] Train Loss: 0.001566, Test Loss: 0.000604, MAE: