## 1. Setup dan Import Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("Library berhasil diimport")

## 2. Load Data yang Sudah Diproses

In [None]:
# Load processed daily trends data
data_path = '../data/processed/daily_trends_processed_latest.csv'

df = pd.read_csv(data_path)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['keyword', 'category', 'date'])

print(f"Shape data: {df.shape}")
print(f"Rentang tanggal: {df['date'].min()} sampai {df['date'].max()}")
print(f"Jumlah keywords: {df['keyword'].nunique()}")

df.head()

## 3. Persiapan Data untuk Neural Prophet

In [None]:
# Neural Prophet butuh kolom: 'ds' (date) dan 'y' (target)
# Kita latih model untuk keyword/category tertentu

sample_keyword = df['keyword'].iloc[0]
sample_category = df['category'].iloc[0]

# Filter data
keyword_data = df[
    (df['keyword'] == sample_keyword) & 
    (df['category'] == sample_category)
].copy()

# Persiapan untuk Neural Prophet
prophet_df = keyword_data[['date', 'interest_value']].copy()
prophet_df.columns = ['ds', 'y']
prophet_df = prophet_df.sort_values('ds').reset_index(drop=True)

print(f"Training model untuk: {sample_keyword} ({sample_category})")
print(f"Jumlah data points: {len(prophet_df)}")
print(f"Rentang tanggal: {prophet_df['ds'].min()} sampai {prophet_df['ds'].max()}")

prophet_df.head()

## 4. Train/Test Split

In [None]:
# Time-based split (80% train, 20% test)
split_idx = int(len(prophet_df) * 0.8)

train_df = prophet_df[:split_idx].copy()
test_df = prophet_df[split_idx:].copy()

print(f"Train set: {len(train_df)} hari")
print(f"Test set: {len(test_df)} hari")
print(f"Periode train: {train_df['ds'].min()} sampai {train_df['ds'].max()}")
print(f"Periode test: {test_df['ds'].min()} sampai {test_df['ds'].max()}")

## 5. Inisialisasi dan Konfigurasi Neural Prophet

In [None]:
# Inisialisasi Neural Prophet model
# n_forecasts=7 untuk prediksi 7 hari ke depan
model = NeuralProphet(
    # Growth settings
    growth='linear',
    
    # Seasonality
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    
    # Neural network settings
    n_lags=30,  # Gunakan 30 hari autoregressive lags
    n_forecasts=7,  # Prediksi 7 hari ke depan
    
    # Training settings
    epochs=100,
    batch_size=32,
    learning_rate=0.01,
    
    # Regularization
    trend_reg=1,
    
    # Other settings
    loss_func='MSE',
    normalize='auto'
)

print("Neural Prophet model diinisialisasi")
print(f"Autoregressive lags: {model.n_lags} hari")
print(f"Forecast horizon: {model.n_forecasts} hari")

## 6. Training Model

In [None]:
# Train model
print("Training model Neural Prophet untuk prediksi 7 hari...")
metrics = model.fit(train_df, freq='D', validation_df=test_df)

print("\nTraining selesai")
print(f"Final training loss: {metrics['Loss'].iloc[-1]:.4f}")

## 7. Visualisasi Training Metrics

In [None]:
# Plot training metrics
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Loss
axes[0].plot(metrics['Loss'], label='Training Loss', linewidth=2)
if 'Loss_val' in metrics.columns:
    axes[0].plot(metrics['Loss_val'], label='Validation Loss', linewidth=2)
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training dan Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# MAE
if 'MAE' in metrics.columns:
    axes[1].plot(metrics['MAE'], label='Training MAE', linewidth=2)
if 'MAE_val' in metrics.columns:
    axes[1].plot(metrics['MAE_val'], label='Validation MAE', linewidth=2)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('Training dan Validation MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Prediksi

In [None]:
# Prediksi pada test set
forecast = model.predict(test_df)

print(f"Shape forecast: {forecast.shape}")
print(f"Kolom forecast: {forecast.columns.tolist()}")

forecast.head(10)

## 9. Evaluasi Model

In [None]:
# Extract actual dan predicted values untuk evaluasi
# Neural Prophet returns yhat1-yhat7 untuk 7-step ahead forecast

# Evaluasi untuk 1-day ahead (yhat1)
actual_1d = forecast['y'].values
predicted_1d = forecast['yhat1'].values

# Remove NaN values
mask = ~np.isnan(actual_1d) & ~np.isnan(predicted_1d)
actual_1d = actual_1d[mask]
predicted_1d = predicted_1d[mask]

# Hitung metrik
mae = mean_absolute_error(actual_1d, predicted_1d)
rmse = np.sqrt(mean_squared_error(actual_1d, predicted_1d))
r2 = r2_score(actual_1d, predicted_1d)
mape = np.mean(np.abs((actual_1d - predicted_1d) / (actual_1d + 1e-10))) * 100

print(f"\nMetrik Test Set (1-day ahead):")
print(f"  MAE:  {mae:.4f}")
print(f"  RMSE: {rmse:.4f}")
print(f"  R2:   {r2:.4f}")
print(f"  MAPE: {mape:.2f}%")

test_metrics = {'MAE': mae, 'RMSE': rmse, 'R2': r2, 'MAPE': mape}

## 10. Visualisasi Prediksi

In [None]:
# Plot forecast
fig = model.plot(forecast)
plt.title(f'Neural Prophet Forecast: {sample_keyword} ({sample_category})', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Scatter plot: Actual vs Predicted
plt.figure(figsize=(10, 6))
plt.scatter(actual_1d, predicted_1d, alpha=0.5, s=30)
plt.plot([0, 100], [0, 100], 'r--', lw=2, label='Perfect Prediction')
plt.xlabel('Actual Interest Value', fontsize=12)
plt.ylabel('Predicted Interest Value', fontsize=12)
plt.title(f'Actual vs Predicted (R2 = {r2:.4f})', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 11. Analisis Komponen

In [None]:
# Plot komponen (trend, seasonality)
fig_comp = model.plot_components(forecast)
plt.tight_layout()
plt.show()

## 12. Prediksi 7 Hari ke Depan dan Identifikasi Hari Terbaik

In [None]:
# Ambil prediksi terakhir (7 hari ke depan)
last_forecast = forecast.tail(1)

# Extract prediksi untuk 7 hari
predictions_7d = [
    last_forecast['yhat1'].values[0],
    last_forecast['yhat2'].values[0],
    last_forecast['yhat3'].values[0],
    last_forecast['yhat4'].values[0],
    last_forecast['yhat5'].values[0],
    last_forecast['yhat6'].values[0],
    last_forecast['yhat7'].values[0]
]

# Buat DataFrame
days = ['Hari 1', 'Hari 2', 'Hari 3', 'Hari 4', 'Hari 5', 'Hari 6', 'Hari 7']
prediction_df = pd.DataFrame({
    'Hari': days,
    'Predicted Interest': predictions_7d
})

# Identifikasi hari terbaik
best_day_idx = np.argmax(predictions_7d)
best_day = days[best_day_idx]
best_interest = predictions_7d[best_day_idx]

print("\nPrediksi 7 Hari ke Depan:")
print("="*60)
print(prediction_df.to_string(index=False))
print("\n" + "="*60)
print(f"REKOMENDASI: Posting di {best_day}")
print(f"Predicted Interest: {best_interest:.2f}")
print("="*60)

# Visualisasi
plt.figure(figsize=(12, 6))
x = range(1, 8)
plt.bar(x, predictions_7d, alpha=0.7, color='skyblue')
plt.axhline(y=60, color='green', linestyle='--', alpha=0.5, label='Threshold Bagus (60)')
plt.axhline(y=40, color='orange', linestyle='--', alpha=0.5, label='Threshold Rendah (40)')
plt.xlabel('Hari ke-', fontsize=12)
plt.ylabel('Predicted Interest Value', fontsize=12)
plt.title(f'Prediksi 7 Hari - {sample_keyword} ({sample_category})', fontsize=14, fontweight='bold')
plt.xticks(x, days, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

## 13. Simpan Model dan Artifacts

In [None]:
# Simpan model
model.save('../models/neuralprophet_daily_7days.pt')
print(f"Model disimpan ke: ../models/neuralprophet_daily_7days.pt")

# Simpan forecast
forecast.to_csv('../models/neuralprophet_daily_7days_forecast.csv', index=False)
print(f"Forecast disimpan ke: ../models/neuralprophet_daily_7days_forecast.csv")

# Simpan metrics
import json
metrics_dict = {
    'model': 'Neural Prophet',
    'forecast_horizon': '7 days',
    'test': test_metrics,
    'n_lags': model.n_lags,
    'n_forecasts': model.n_forecasts,
    'keyword': sample_keyword,
    'category': sample_category
}

with open('../models/neuralprophet_daily_7days_metrics.json', 'w') as f:
    json.dump(metrics_dict, f, indent=2)

print(f"Metrics disimpan ke: ../models/neuralprophet_daily_7days_metrics.json")

## 14. Ringkasan

### Performa Model:
- Algoritma: Neural Prophet
- Autoregressive Lags: 30 hari
- Forecast Horizon: 7 hari ke depan
- Test RMSE: Lihat output di atas
- Test R2: Lihat output di atas

### Output Model:
- Prediksi interest value untuk 7 hari ke depan
- Identifikasi hari terbaik untuk posting
- Decomposition komponen (trend, seasonality)
- Untuk ensemble dengan LightGBM dan LSTM

### Kelebihan:
- Deteksi seasonality otomatis
- Handle trends dan changepoints
- Mudah diinterpretasi (decomposition)
- Built-in uncertainty quantification
- Training cepat dengan PyTorch

### Keterbatasan:
- Butuh time series reguler (no gaps)
- Perlu tuning untuk pola spesifik
- Kurang fleksibel dibanding pure deep learning

### Next Steps:
1. Bandingkan dengan LightGBM dan LSTM (forecast horizon sama: 7 hari)
2. Ensemble 3 model untuk prediksi lebih robust
3. Kombinasikan dengan hourly model
4. Deploy untuk production