In [1]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from statsmodels.tools.eval_measures import rmse
import pmdarima as pm

def forecast_with_arima(df, column, freq, forecast_periods,m):
    # Resample the data
    df_resampled = df.resample(freq).sum(numeric_only=True)
    
    # Split data into training and testing
    train = df_resampled[column][:-forecast_periods]
    test = df_resampled[column][-forecast_periods:]

    # Train the ARIMA model using auto_arima
    model = pm.auto_arima(df_resampled[column],m=m, seasonal=True, start_p=0, start_q=0, max_order=4, test='adf', trace=True,
                          error_action='ignore', suppress_warnings=True, stepwise=True)

    # Fit the model and make predictions
    results = model.fit(train)
    predictions = results.predict(n_periods=forecast_periods)

    # Calculate performance metrics
    mse_error = mean_squared_error(test, predictions)
    rmse_error = rmse(test, predictions)
    mape_error = mean_absolute_percentage_error(test, predictions)
    accuracy = (1 - mape_error) * 100
    
    # Print the results
    print(f'Frequency: {freq}')
    print(f'MSE Error: {mse_error:11.10}')
    print(f'RMSE Error: {rmse_error:11.10}')
    print(f'MAPE Error: {mape_error:11.10}')
    print(f'Accuracy: {accuracy:11.10}')
    
    # Re-fit the model on the full data and forecast future values
    results_full = model.fit(df_resampled[column])
    forecast = results_full.predict(n_periods=forecast_periods)
    
    # Combine actual and predicted data
    DF_actual = pd.DataFrame(df_resampled[column])
    DF_actual['Type'] = 'Actual'
    DF_forecast = pd.DataFrame(forecast, columns=[column])
    DF_forecast['Type'] = 'Predicted'
    
    final_df = pd.concat([DF_actual, DF_forecast])
    final_df = final_df.reset_index()
    final_df = final_df.rename(columns={'index': 'Date'})
    
    return final_df

# Example of usage
df=pd.read_csv("final_data_in_ML.csv",parse_dates=['Standardized_Date'])
df['DATETIME'] = pd.to_datetime(df['Standardized_Date'].astype(str) + ' ' + df['STANDARDIZED_TIME'].astype(str))
df=df[['DATETIME','CLEAR WATER PUMPING FLOW ML']]
df.set_index('DATETIME', inplace=True)

# Forecast for daily, weekly, and monthly
print("Daily_resampled_data")
daily_results = forecast_with_arima(df, 'CLEAR WATER PUMPING FLOW ML', 'D',4,52)
print("Weekly_resampled_data")
weekly_results = forecast_with_arima(df, 'CLEAR WATER PUMPING FLOW ML', 'W',4,7)
print("Monthly_resampled_data")
monthly_results = forecast_with_arima(df, 'CLEAR WATER PUMPING FLOW ML', 'M',4,12)

# Show the results
print("Daily_resampled_data")
print(daily_results.tail(60))
print("Weekly_resampled_data")
print(weekly_results.tail(60))
print("Monthly_resampled_data")
print(monthly_results.tail(60))


Daily_resampled_data
Performing stepwise search to minimize aic
 ARIMA(0,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=21.56 sec
 ARIMA(0,0,0)(0,0,0)[52] intercept   : AIC=7886.594, Time=0.04 sec
 ARIMA(1,0,0)(1,0,0)[52] intercept   : AIC=7383.265, Time=14.88 sec
 ARIMA(0,0,1)(0,0,1)[52] intercept   : AIC=7565.938, Time=7.35 sec
 ARIMA(0,0,0)(0,0,0)[52]             : AIC=11187.043, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[52] intercept   : AIC=7381.271, Time=0.18 sec
 ARIMA(1,0,0)(0,0,1)[52] intercept   : AIC=7383.272, Time=5.48 sec
 ARIMA(1,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=20.79 sec
 ARIMA(2,0,0)(0,0,0)[52] intercept   : AIC=7350.052, Time=0.18 sec
 ARIMA(2,0,0)(1,0,0)[52] intercept   : AIC=7351.587, Time=20.66 sec
 ARIMA(2,0,0)(0,0,1)[52] intercept   : AIC=7351.620, Time=9.22 sec
 ARIMA(2,0,0)(1,0,1)[52] intercept   : AIC=inf, Time=27.24 sec
 ARIMA(3,0,0)(0,0,0)[52] intercept   : AIC=7350.259, Time=0.34 sec
 ARIMA(2,0,1)(0,0,0)[52] intercept   : AIC=7335.596, Time=1.35 sec
 ARIMA(