In [None]:
# --- Step 1: Load and prepare the data ---
import pandas as pd

# Load dataset
df = pd.read_excel("../datasets/modified_location_0.xlsx")

# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Drop rows with missing rainfall values
df.dropna(subset=['Rainfall (in mm)'], inplace=True)

# Set 'Date' as the index and select rainfall series
df.set_index('Date', inplace=True)
rainfall_ts = df['Rainfall (in mm)']

# --- Step 2: Split data into train and test ---
train = rainfall_ts['2018-01-01':'2022-12-31']
test = rainfall_ts['2023-01-01':'2024-12-31']

# --- Step 3: Fit SARIMA Model (Memory-optimized) ---
from pmdarima import auto_arima

model = auto_arima(train,
                   start_p=0, start_q=0,
                   max_p=2, max_q=2,
                   d=None,           # let it infer
                   seasonal=True,
                   m=365,            # yearly seasonality for daily data
                   start_P=0, start_Q=0,
                   max_P=1, max_Q=1, max_D=1,
                   stepwise=True,
                   n_fits=10,        # limit number of model fits
                   error_action='ignore',
                   suppress_warnings=True,
                   trace=True,
                   max_order=5)      # total p+q+P+Q <= 5 to limit complexity

# --- Step 4: Forecast ---
forecast_period = len(test)
forecast_values = model.predict(n_periods=forecast_period)

# --- Step 5: Evaluate Forecast ---
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(test, forecast_values)
mse = mean_squared_error(test, forecast_values)
rmse = np.sqrt(mse)
r2 = r2_score(test, forecast_values)

print("\n--- SARIMA Forecast Evaluation ---")
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.4f}")

# --- Step 6: Plot Results ---
import matplotlib.pyplot as plt

plt.figure(figsize=(14,6))
plt.plot(test.index, test.values, label='Actual')
plt.plot(test.index, forecast_values, label='Forecast', linestyle='--')
plt.title("SARIMA Forecast vs Actual Rainfall (2023-2024)")
plt.xlabel("Date")
plt.ylabel("Rainfall (mm)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,0,0)[365] intercept   : AIC=29708.313, Time=0.10 sec
