In [1]:
import pandas as pd

# Load the dataset
df = pd.read_excel("..//..//datasets//modified_location_0.xlsx")  # Replace with your actual file path

# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')  # Adjust format if necessary

# Drop rows with missing values in 'Rainfall' or 'Date'
df.dropna(subset=['Rainfall (in mm)'], inplace=True)

# Select only the 'Date' and 'Rainfall' columns
df = df[['Date', 'Rainfall (in mm)']]

# Set the 'Date' column as the index
df.set_index('Date', inplace=True)

# Convert Rainfall to time series (assuming daily data)
rainfall_ts = df['Rainfall (in mm)']

# Display the first few rows to check the data
print(df.head())

            Rainfall (in mm)
Date                        
1901-01-01               3.0
1901-01-02               8.0
1901-01-03               0.0
1901-01-04               0.0
1901-01-05               0.0


In [2]:
print(df.shape)  # (rows, columns)

(61600, 1)


In [3]:
from pmdarima import auto_arima

# Define training range
train_start = '2018-01-01'
train_end = '2022-12-31'

# Slice training data
y_train = rainfall_ts[train_start:train_end]

# Fit a lightweight seasonal ARIMA model
model = auto_arima(
    y_train,
    seasonal=True,
    m=15,  
    start_p=0, start_q=0, max_p=1, max_q=1, max_d=1,
    max_P=1, max_Q=1, max_D=1,
    stepwise=True,
    trace=True,
    suppress_warnings=True,
    information_criterion='aic'
)

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(1,0,1)[15] intercept   : AIC=29312.965, Time=5.32 sec
 ARIMA(0,0,0)(0,0,0)[15] intercept   : AIC=29708.313, Time=0.08 sec
 ARIMA(1,0,0)(1,0,0)[15] intercept   : AIC=27319.196, Time=2.11 sec
 ARIMA(0,0,1)(0,0,1)[15] intercept   : AIC=26749.619, Time=3.64 sec
 ARIMA(0,0,0)(0,0,0)[15]             : AIC=30756.177, Time=0.12 sec
 ARIMA(0,0,1)(0,0,0)[15] intercept   : AIC=26807.950, Time=0.46 sec
 ARIMA(0,0,1)(1,0,1)[15] intercept   : AIC=26644.361, Time=11.29 sec
 ARIMA(0,0,1)(1,0,0)[15] intercept   : AIC=26736.029, Time=2.77 sec
 ARIMA(1,0,1)(1,0,1)[15] intercept   : AIC=26603.962, Time=12.59 sec
 ARIMA(1,0,1)(0,0,1)[15] intercept   : AIC=26669.958, Time=7.05 sec
 ARIMA(1,0,1)(1,0,0)[15] intercept   : AIC=26662.519, Time=3.83 sec
 ARIMA(1,0,1)(0,0,0)[15] intercept   : AIC=26712.810, Time=1.01 sec
 ARIMA(1,0,0)(1,0,1)[15] intercept   : AIC=27306.515, Time=4.93 sec
 ARIMA(1,0,1)(1,0,1)[15]             : AIC=26714.776, Time=3.20 sec

Be

In [6]:
# Forecast for 2023
forecast_start = '2023-01-01'
forecast_end = '2023-12-31'

# Extract actual values
y_true_2023 = df[(df.index >= forecast_start) & (df.index <= forecast_end)]['Rainfall (in mm)']

# Predict for the same period
n_periods = len(y_true_2023)
y_pred_2023 = model.predict(n_periods=n_periods)

# Check lengths
print("Actual data size:", len(y_true_2023))
print("Predicted data size:", len(y_pred_2023))

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Evaluation metrics
mse_2023 = mean_squared_error(y_true_2023, y_pred_2023)  # MSE instead of RMSE
mae_2023 = mean_absolute_error(y_true_2023, y_pred_2023)
r2_2023 = r2_score(y_true_2023, y_pred_2023)

print(f"MSE for 2023 forecast: {mse_2023}")
print(f"MAE for 2023 forecast: {mae_2023}")
print(f"R² for 2023 forecast: {r2_2023}")


# Create results DataFrame using index (dates) directly
forecast_df_2023 = pd.DataFrame({
    'Date': y_true_2023.index,                   # correct date index
    'Actual Rainfall': y_true_2023.values,       # actual values
    'Predicted Rainfall': y_pred_2023            # predicted values
})

# Save to Excel
forecast_df_2023.to_excel('.//sarima_location_0_(15_5yrs).xlsx', index=False)

print("✅ Forecast saved successfully: 2023_forecast_results_Sarima.xlsx")


Actual data size: 365
Predicted data size: 365
MSE for 2023 forecast: 212.78902703422656
MAE for 2023 forecast: 9.639173849465609
R² for 2023 forecast: 0.008697317089692458
✅ Forecast saved successfully: 2023_forecast_results_Sarima.xlsx


  return get_prediction_index(
  return get_prediction_index(


In [7]:
# now with m = 30

In [8]:
from pmdarima import auto_arima

# Define training range
train_start = '2018-01-01'
train_end = '2022-12-31'

# Slice training data
y_train = rainfall_ts[train_start:train_end]

# Fit a lightweight seasonal ARIMA model
model_30 = auto_arima(
    y_train,
    seasonal=True,
    m=30,  
    start_p=0, start_q=0, max_p=1, max_q=1, max_d=1,
    max_P=1, max_Q=1, max_D=1,
    stepwise=True,
    trace=True,
    suppress_warnings=True,
    information_criterion='aic'
)

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(1,0,1)[30] intercept   : AIC=29458.912, Time=12.09 sec
 ARIMA(0,0,0)(0,0,0)[30] intercept   : AIC=29708.313, Time=0.07 sec
 ARIMA(1,0,0)(1,0,0)[30] intercept   : AIC=27319.056, Time=9.13 sec
 ARIMA(0,0,1)(0,0,1)[30] intercept   : AIC=26754.229, Time=8.59 sec
 ARIMA(0,0,0)(0,0,0)[30]             : AIC=30756.177, Time=0.05 sec
 ARIMA(0,0,1)(0,0,0)[30] intercept   : AIC=26807.950, Time=0.48 sec
 ARIMA(0,0,1)(1,0,1)[30] intercept   : AIC=26725.681, Time=19.98 sec
 ARIMA(0,0,1)(1,0,0)[30] intercept   : AIC=26747.815, Time=8.29 sec
 ARIMA(1,0,1)(1,0,1)[30] intercept   : AIC=26672.769, Time=17.66 sec
 ARIMA(1,0,1)(0,0,1)[30] intercept   : AIC=26685.177, Time=14.66 sec
 ARIMA(1,0,1)(1,0,0)[30] intercept   : AIC=26682.907, Time=14.91 sec
 ARIMA(1,0,1)(0,0,0)[30] intercept   : AIC=26712.810, Time=1.15 sec
 ARIMA(1,0,0)(1,0,1)[30] intercept   : AIC=27319.229, Time=20.04 sec
 ARIMA(1,0,1)(1,0,1)[30]             : AIC=inf, Time=29.96 sec

Bes

In [10]:
# Forecast for 2023
forecast_start = '2023-01-01'
forecast_end = '2023-12-31'

# Extract actual values
y_true_2023 = df[(df.index >= forecast_start) & (df.index <= forecast_end)]['Rainfall (in mm)']

# Predict for the same period
n_periods = len(y_true_2023)
y_pred_2023 = model_30.predict(n_periods=n_periods)

# Check lengths
print("Actual data size:", len(y_true_2023))
print("Predicted data size:", len(y_pred_2023))

# Evaluation metrics
mse_2023 = mean_squared_error(y_true_2023, y_pred_2023)
mae_2023 = mean_absolute_error(y_true_2023, y_pred_2023)
r2_2023 = r2_score(y_true_2023, y_pred_2023)

print(f"MSE for 2023 forecast: {mse_2023}")
print(f"MAE for 2023 forecast: {mae_2023}")
print(f"R² for 2023 forecast: {r2_2023}")

# Create results DataFrame using index (dates) directly
forecast_df_2023 = pd.DataFrame({
    'Date': y_true_2023.index,                   # correct date index
    'Actual Rainfall': y_true_2023.values,       # actual values
    'Predicted Rainfall': y_pred_2023            # predicted values
})

# Save to Excel
forecast_df_2023.to_excel('.//sarima_location_0_(30_5yrs.xlsx', index=False)

print("✅ Forecast saved successfully: 2023_forecast_results_Sarima.xlsx")


  return get_prediction_index(
  return get_prediction_index(


Actual data size: 365
Predicted data size: 365
MSE for 2023 forecast: 213.4902756629969
MAE for 2023 forecast: 9.837069824860688
R² for 2023 forecast: 0.0054304679632316555
✅ Forecast saved successfully: 2023_forecast_results_Sarima.xlsx
