# NeuralProphet Model for Tomatoes Price Forecasting

This notebook implements NeuralProphet, a neural network-based time series forecasting model inspired by Facebook Prophet but implemented with PyTorch.

**Note:** NeuralProphet has dependency conflicts with some packages. This separate notebook allows you to run it independently.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df_clean = pd.read_csv('data/df_clean.csv', index_col=0, parse_dates=True)
forecast_horizon = 7

print(f"Dataset loaded: {df_clean.shape}")
df_clean.head()

Dataset loaded: (5804, 15)


Unnamed: 0_level_0,price_per_kg,day_of_week,month,week_of_year,day_of_month,season,rolling_mean_3,rolling_mean_5,rolling_mean_7,lag_1,lag_3,lag_5,rolling_std_3,rolling_std_5,rolling_std_7
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-04,2660.0,0.0,1.0,1,4.0,0.0,2686.666667,2674.0,2664.285714,2660.0,2660.0,2660.0,30.550505,29.664794,29.358215
2010-01-05,2720.0,1.0,1.0,1,5.0,0.0,2686.666667,2674.0,2664.285714,2660.0,2660.0,2660.0,30.550505,29.664794,29.358215
2010-01-06,2680.0,2.0,1.0,1,6.0,0.0,2686.666667,2674.0,2664.285714,2720.0,2660.0,2660.0,30.550505,29.664794,29.358215
2010-01-07,2670.0,3.0,1.0,1,7.0,0.0,2690.0,2674.0,2664.285714,2680.0,2660.0,2660.0,26.457513,29.664794,29.358215
2010-01-08,2640.0,4.0,1.0,1,8.0,0.0,2663.333333,2674.0,2664.285714,2670.0,2720.0,2660.0,20.81666,29.664794,29.358215


## Install NeuralProphet

First, we need to install NeuralProphet and its dependencies.

In [2]:


# Suppress NeuralProphet logging
import logging
logging.getLogger('neuralprophet').setLevel(logging.ERROR)

## Prepare Data for NeuralProphet

NeuralProphet requires data in Prophet format with 'ds' (date) and 'y' (target) columns.

In [3]:
# Prepare data for NeuralProphet
prophet_df = pd.DataFrame({
    'ds': df_clean.index,
    'y': df_clean['price_per_kg'].values
})

# Remove any NaN values
prophet_df = prophet_df.dropna()

# Use 80/20 train/test split
prophet_train_size = int(len(prophet_df) * 0.8)
prophet_train = prophet_df.iloc[:prophet_train_size].copy()
prophet_test = prophet_df.iloc[prophet_train_size:].copy()

print(f"Training set: {len(prophet_train)} samples")
print(f"Test set: {len(prophet_test)} samples")
print(f"\nTrain date range: {prophet_train['ds'].iloc[0]} to {prophet_train['ds'].iloc[-1]}")
print(f"Test date range: {prophet_test['ds'].iloc[0]} to {prophet_test['ds'].iloc[-1]}")

Training set: 4643 samples
Test set: 1161 samples

Train date range: 2010-01-04 00:00:00 to 2022-09-20 00:00:00
Test date range: 2022-09-21 00:00:00 to 2025-11-24 00:00:00


## Train NeuralProphet Model

Train a Neural Prophet model with optimized parameters.

In [4]:
from neuralprophet import NeuralProphet

# Initialize NeuralProphet model
model = NeuralProphet(
    epochs=100,
    batch_size=32,
    learning_rate=0.01,
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    n_lags=7,  # Use past 7 days
    n_forecasts=7  # Forecast 7 days ahead
)

print("Training NeuralProphet model...")
print("This may take several minutes...\n")

# Train the model
metrics = model.fit(prophet_train, freq='D')

print("\n✓ NeuralProphet model trained successfully!")

Importing plotly failed. Interactive plots will not work.
Importing plotly failed. Interactive plots will not work.
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.978% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.


Training NeuralProphet model...
This may take several minutes...

Training: |          | 0/? [00:49<?, ?it/s, v_num=0, train_loss=0.00561, reg_loss=0.000, MAE=131.0, RMSE=193.0, Loss=0.0056, RegLoss=0.000] 

✓ NeuralProphet model trained successfully!


## Make Predictions

In [5]:
# Create future dataframe and make predictions
future = model.make_future_dataframe(prophet_train, periods=len(prophet_test))
forecast = model.predict(future)

print(f"Generated {len(forecast)} forecasts")
print(f"\nForecast columns: {forecast.columns.tolist()}")
forecast.tail(10)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.978% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 92.857% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 92.857% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.data.processing._handle_missing_data) - Dropped 7 rows at the end with NaNs in 'y' column.


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 333.33it/s]


AttributeError: `np.NaN` was removed in the NumPy 2.0 release. Use `np.nan` instead.

## Evaluate Model Performance

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    """Calculate MAPE metric"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Extract test predictions
test_forecast = forecast.tail(len(prophet_test))

# Get actual values
y_true = prophet_test['y'].values
y_pred = test_forecast['yhat1'].values  # Day 1 forecast

# Calculate metrics
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = mean_absolute_percentage_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print("="*60)
print("NEURALPROPHET - MODEL EVALUATION")
print("="*60)
print(f"MAE:  {mae:.2f} KRW")
print(f"RMSE: {rmse:.2f} KRW")
print(f"MAPE: {mape:.2f}%")
print(f"R²:   {r2:.4f}")
print("="*60)

## Visualize Results

In [None]:
# Plot predictions vs actuals
plt.figure(figsize=(16, 6))
plt.plot(prophet_test['ds'], y_true, label='Actual', linewidth=2, alpha=0.7)
plt.plot(test_forecast['ds'], y_pred, label='Predicted', linewidth=2, alpha=0.7, linestyle='--')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price per kg (KRW)', fontsize=12)
plt.title(f'NeuralProphet: Predictions vs Actual\nMAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%, R²: {r2:.4f}', 
          fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Save Model and Results

In [None]:
import os
import pickle

# Create directories
os.makedirs('results', exist_ok=True)
os.makedirs('saved_models', exist_ok=True)

# Save results to CSV
results_df = pd.DataFrame({
    'Model': ['NeuralProphet'],
    'MAE': [mae],
    'RMSE': [rmse],
    'MAPE': [mape],
    'R2': [r2]
})
results_df.to_csv('results/neuralprophet_metrics.csv', index=False)
print("✓ Saved: results/neuralprophet_metrics.csv")

# Save predictions
predictions_df = pd.DataFrame({
    'Date': prophet_test['ds'],
    'Actual': y_true,
    'Predicted': y_pred
})
predictions_df.to_csv('results/neuralprophet_predictions.csv', index=False)
print("✓ Saved: results/neuralprophet_predictions.csv")

# Save model
with open('saved_models/neuralprophet_model.pkl', 'wb') as f:
    pickle.dump(model, f)
print("✓ Saved: saved_models/neuralprophet_model.pkl")

print("\n" + "="*60)
print("ALL NEURALPROPHET RESULTS AND MODEL SAVED!")
print("="*60)