In [1]:
import pandas as pd

# Load the dataset
df = pd.read_excel("..//datasets//modified_location_0.xlsx")  # Replace with your actual file path

# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')  # Adjust format if necessary

# Drop rows with missing values in 'Rainfall' or 'Date'
df.dropna(subset=['Rainfall (in mm)'], inplace=True)

# Select only the 'Date' and 'Rainfall' columns
df = df[['Date', 'Rainfall (in mm)']]

# Set the 'Date' column as the index
df.set_index('Date', inplace=True)

# Convert Rainfall to time series (assuming daily data)
rainfall_ts = df['Rainfall (in mm)']

# Display the first few rows to check the data
print(df.head())

            Rainfall (in mm)
Date                        
1901-01-01               3.0
1901-01-02               8.0
1901-01-03               0.0
1901-01-04               0.0
1901-01-05               0.0


In [2]:
train_start = '2018-01-01'
train_end = '2022-12-31'
test_start = '2023-01-01'
test_end = '2023-12-31' 

# Slice the data for the training and testing sets using loc
y_train = rainfall_ts.loc[train_start:train_end]
y_test = rainfall_ts.loc[test_start:test_end]

In [3]:
import numpy as np
import pandas as pd
from pmdarima import auto_arima
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.tsatools import detrend

# Define Fourier terms function
def fourier_terms(time_series, period, K):
    """Generate Fourier terms for a given seasonal period and number of harmonics K."""
    t = np.arange(len(time_series))
    terms = np.column_stack([np.sin(2 * np.pi * k * t / period) for k in range(1, K + 1)] +
                            [np.cos(2 * np.pi * k * t / period) for k in range(1, K + 1)])
    return terms

# Initialize best fit storage
best_fit = {'aicc': np.inf, 'i': 0, 'j': 0, 'model': None}

# Define seasonal periods
seasonality_1 = 366.66
seasonality_2 = 733.33

# Loop over different Fourier terms to find the best AICc
for i in range(1, 4):  # Vary K1 from 1 to 3
    for j in range(1, 4):  # Vary K2 from 1 to 3
        # Generate Fourier terms
        z1 = fourier_terms(y_train, seasonality_1, i)
        z2 = fourier_terms(y_train, seasonality_2, j)
        xreg = np.hstack([z1, z2])  # Combine Fourier terms

        # Fit ARIMA model with external regressors
        model = auto_arima(y_train, exogenous=xreg, seasonal=False, stepwise=True, suppress_warnings=True)

        # Check if current model has a lower AICc
        if model.aic() < best_fit['aicc']:
            best_fit = {'aicc': model.aic(), 'i': i, 'j': j, 'model': model}

# Print best model details
print("Best AICc:", best_fit['aicc'])
print("Best Fourier Terms:", "K1 =", best_fit['i'], ", K2 =", best_fit['j'])

Best AICc: 26240.916809416107
Best Fourier Terms: K1 = 1 , K2 = 1


In [4]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import os

# Step 1: Generate Fourier terms for the test period using the best K1 and K2
K1 = best_fit['i']
K2 = best_fit['j']

z1_test = fourier_terms(y_test, seasonality_1, K1)
z2_test = fourier_terms(y_test, seasonality_2, K2)
xreg_test = np.hstack([z1_test, z2_test])

# Step 2: Forecast
n_periods = len(y_test)
forecast = best_fit['model'].predict(n_periods=n_periods, exogenous=xreg_test)

# Step 3: Evaluation
mae = mean_absolute_error(y_test, forecast)
rmse = np.sqrt(mean_squared_error(y_test, forecast))
r2 = r2_score(y_test, forecast)

print("MAE:", mae)
print("RMSE:", rmse)
print("R²:", r2)

# Step 4: Create a DataFrame for saving
results_df = pd.DataFrame({
    'Date': y_test.index,
    'Actual': y_test.values,
    'Forecast': forecast
})
results_df.set_index('Date', inplace=True)

# Step 5: Save to Excel
location_name = 'location_0'  # Replace with actual location name
output_filename = f'{location_name}_fourier_5yrs.xlsx'
results_df.to_excel(output_filename)

print(f"Forecast saved to {output_filename}")


  return get_prediction_index(
  return get_prediction_index(


MAE: 8.990785614661355
RMSE: 15.078632835666651
R²: -0.05920734904092151
Forecast saved to location_0_fourier_5yrs.xlsx
