## 1. Setup dan Import Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU tersedia: {len(tf.config.list_physical_devices('GPU')) > 0}")

## 2. Load Data yang Sudah Diproses

In [None]:
# Load processed daily trends data
data_path = '../data/processed/daily_trends_processed_latest.csv'

df = pd.read_csv(data_path)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['keyword', 'category', 'date'])

print(f"Shape data: {df.shape}")
print(f"Rentang tanggal: {df['date'].min()} sampai {df['date'].max()}")
print(f"Jumlah keywords: {df['keyword'].nunique()}")

df.head()

## 3. Persiapan Sequences untuk LSTM (Prediksi 7 Hari)

In [None]:
def create_sequences_7day(data, lookback=30, forecast_horizon=7):
    """
    Buat sequences untuk LSTM training
    
    Args:
        data: Time series data
        lookback: Jumlah hari history yang digunakan (30 hari)
        forecast_horizon: Jumlah hari yang diprediksi (7 hari)
    
    Returns:
        X, y arrays untuk training
    """
    X, y = [], []
    
    for i in range(len(data) - lookback - forecast_horizon + 1):
        X.append(data[i:i+lookback])
        # Target: 7 hari ke depan setelah lookback
        y.append(data[i+lookback:i+lookback+forecast_horizon])
    
    return np.array(X), np.array(y)

# Parameters
LOOKBACK = 30  # Gunakan 30 hari history
FORECAST_HORIZON = 7  # Prediksi 7 hari ke depan

print(f"Parameter sequences:")
print(f"  Lookback: {LOOKBACK} hari")
print(f"  Forecast horizon: {FORECAST_HORIZON} hari")

In [None]:
# Persiapan data untuk keyword/category tertentu
# Untuk demo, kita latih satu model per keyword-category
# Di production, bisa train satu model untuk semua

sample_keyword = df['keyword'].iloc[0]
sample_category = df['category'].iloc[0]

# Filter data
keyword_data = df[
    (df['keyword'] == sample_keyword) & 
    (df['category'] == sample_category)
].copy()

keyword_data = keyword_data.sort_values('date')

print(f"Training model untuk: {sample_keyword} ({sample_category})")
print(f"Jumlah data points: {len(keyword_data)}")

# Extract interest values
values = keyword_data['interest_value'].values.reshape(-1, 1)

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_values = scaler.fit_transform(values)

print(f"Data dinormalisasi ke range [0, 1]")

In [None]:
# Buat sequences
X, y = create_sequences_7day(scaled_values, LOOKBACK, FORECAST_HORIZON)

print(f"Shape sequences:")
print(f"  X: {X.shape} (samples, lookback, features)")
print(f"  y: {y.shape} (samples, forecast_horizon, features)")

# Train/test split (80/20)
split_idx = int(len(X) * 0.8)

X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

print(f"\nTrain set: {X_train.shape[0]} sequences")
print(f"Test set: {X_test.shape[0]} sequences")

## 4. Bangun Arsitektur Model LSTM

In [None]:
def build_lstm_model_7day(lookback, forecast_horizon):
    """
    Bangun arsitektur LSTM untuk prediksi 7 hari
    """
    model = Sequential([
        # First LSTM layer
        LSTM(128, return_sequences=True, input_shape=(lookback, 1)),
        Dropout(0.2),
        
        # Second LSTM layer
        LSTM(64, return_sequences=False),
        Dropout(0.2),
        
        # Dense layers
        Dense(32, activation='relu'),
        Dropout(0.2),
        
        # Output layer (7 hari)
        Dense(forecast_horizon)
    ])
    
    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Build model
model = build_lstm_model_7day(LOOKBACK, FORECAST_HORIZON)

print("Arsitektur Model:")
model.summary()

## 5. Training Model

In [None]:
# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    '../models/lstm_daily_7days.keras',
    monitor='val_loss',
    save_best_only=True
)

# Reshape y untuk training (flatten forecast horizon)
y_train_flat = y_train.reshape(y_train.shape[0], -1)
y_test_flat = y_test.reshape(y_test.shape[0], -1)

# Train model
print("Training model LSTM untuk prediksi 7 hari...")
history = model.fit(
    X_train, y_train_flat,
    validation_data=(X_test, y_test_flat),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, model_checkpoint],
    verbose=1
)

print("\nTraining selesai")

## 6. Visualisasi Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss
axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('Model Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# MAE
axes[1].plot(history.history['mae'], label='Train MAE')
axes[1].plot(history.history['val_mae'], label='Val MAE')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('Model MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Evaluasi Model

In [None]:
# Prediksi
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Inverse transform predictions
y_train_inv = scaler.inverse_transform(y_train_flat)
y_test_inv = scaler.inverse_transform(y_test_flat)
y_pred_train_inv = scaler.inverse_transform(y_pred_train)
y_pred_test_inv = scaler.inverse_transform(y_pred_test)

# Hitung metrik
def calculate_metrics(y_true, y_pred, dataset_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    
    print(f"\nMetrik {dataset_name}:")
    print(f"  MAE:  {mae:.4f}")
    print(f"  RMSE: {rmse:.4f}")
    print(f"  R2:   {r2:.4f}")
    
    return {'MAE': mae, 'RMSE': rmse, 'R2': r2}

train_metrics = calculate_metrics(y_train_inv, y_pred_train_inv, "Train")
test_metrics = calculate_metrics(y_test_inv, y_pred_test_inv, "Test")

## 8. Visualisasi Prediksi 7 Hari

In [None]:
# Plot prediksi untuk test set (5 sample pertama)
n_samples = min(5, len(y_test_inv))

fig, axes = plt.subplots(n_samples, 1, figsize=(14, 3*n_samples))
if n_samples == 1:
    axes = [axes]

for i in range(n_samples):
    axes[i].plot(range(1, FORECAST_HORIZON+1), y_test_inv[i], 
                label='Actual', marker='o', linewidth=2)
    axes[i].plot(range(1, FORECAST_HORIZON+1), y_pred_test_inv[i], 
                label='Predicted', marker='s', linewidth=2, alpha=0.7)
    axes[i].set_title(f'Test Sample {i+1} - Prediksi 7 Hari')
    axes[i].set_xlabel('Hari ke-')
    axes[i].set_ylabel('Interest Value')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)
    axes[i].set_xticks(range(1, FORECAST_HORIZON+1))

plt.tight_layout()
plt.show()

In [None]:
# Scatter plot: Actual vs Predicted
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Train set
axes[0].scatter(y_train_inv.flatten(), y_pred_train_inv.flatten(), alpha=0.3, s=10)
axes[0].plot([0, 100], [0, 100], 'r--', lw=2)
axes[0].set_xlabel('Actual Interest Value')
axes[0].set_ylabel('Predicted Interest Value')
axes[0].set_title(f'Train Set (R2 = {train_metrics["R2"]:.4f})')
axes[0].grid(True, alpha=0.3)

# Test set
axes[1].scatter(y_test_inv.flatten(), y_pred_test_inv.flatten(), alpha=0.3, s=10, color='orange')
axes[1].plot([0, 100], [0, 100], 'r--', lw=2)
axes[1].set_xlabel('Actual Interest Value')
axes[1].set_ylabel('Predicted Interest Value')
axes[1].set_title(f'Test Set (R2 = {test_metrics["R2"]:.4f})')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 9. Identifikasi Hari Terbaik untuk Posting

In [None]:
# Ambil prediksi terakhir untuk rekomendasi
latest_prediction = y_pred_test_inv[-1]  # Prediksi 7 hari terakhir
latest_actual = y_test_inv[-1]

# Buat DataFrame untuk visualisasi
days = ['Hari 1', 'Hari 2', 'Hari 3', 'Hari 4', 'Hari 5', 'Hari 6', 'Hari 7']
prediction_df = pd.DataFrame({
    'Hari': days,
    'Predicted Interest': latest_prediction,
    'Actual Interest': latest_actual
})

# Identifikasi hari terbaik
best_day_idx = np.argmax(latest_prediction)
best_day = days[best_day_idx]
best_interest = latest_prediction[best_day_idx]

print("\nPrediksi 7 Hari ke Depan:")
print("="*60)
print(prediction_df.to_string(index=False))
print("\n" + "="*60)
print(f"REKOMENDASI: Posting di {best_day}")
print(f"Predicted Interest: {best_interest:.2f}")
print("="*60)

# Visualisasi
plt.figure(figsize=(12, 6))
x = range(1, 8)
plt.bar(x, latest_prediction, alpha=0.7, label='Predicted', color='skyblue')
plt.plot(x, latest_actual, marker='o', color='red', linewidth=2, label='Actual', markersize=8)
plt.axhline(y=60, color='green', linestyle='--', alpha=0.5, label='Threshold Bagus (60)')
plt.axhline(y=40, color='orange', linestyle='--', alpha=0.5, label='Threshold Rendah (40)')
plt.xlabel('Hari ke-', fontsize=12)
plt.ylabel('Interest Value', fontsize=12)
plt.title(f'Prediksi 7 Hari - {sample_keyword} ({sample_category})', fontsize=14, fontweight='bold')
plt.xticks(x, days, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 10. Simpan Model dan Artifacts

In [None]:
# Model sudah disimpan via ModelCheckpoint callback
print(f"Model disimpan ke: ../models/lstm_daily_7days.keras")

# Simpan scaler
import joblib
joblib.dump(scaler, '../models/lstm_daily_7days_scaler.pkl')
print(f"Scaler disimpan ke: ../models/lstm_daily_7days_scaler.pkl")

# Simpan metrics
import json
metrics = {
    'model': 'LSTM',
    'forecast_horizon': '7 days',
    'train': train_metrics,
    'test': test_metrics,
    'lookback': LOOKBACK,
    'keyword': sample_keyword,
    'category': sample_category
}

with open('../models/lstm_daily_7days_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics disimpan ke: ../models/lstm_daily_7days_metrics.json")

## 11. Ringkasan

### Performa Model:
- Algoritma: LSTM Neural Network
- Lookback: 30 hari
- Forecast Horizon: 7 hari ke depan
- Test RMSE: Lihat output di atas
- Test R2: Lihat output di atas

### Output Model:
- Prediksi interest value untuk 7 hari ke depan
- Identifikasi hari terbaik untuk posting
- Untuk ensemble dengan LightGBM dan Neural Prophet

### Kelebihan:
- Menangkap long-term dependencies
- Bagus untuk sequential patterns
- Multi-step forecasting

### Keterbatasan:
- Butuh lebih banyak data
- Training lebih lama
- Perlu hyperparameter tuning

### Next Steps:
1. Bandingkan dengan LightGBM dan Neural Prophet (forecast horizon sama: 7 hari)
2. Ensemble 3 model untuk prediksi lebih robust
3. Kombinasikan dengan hourly model
4. Deploy untuk production