In [31]:
import os
import random
import requests
import warnings
from datetime import datetime

import holidays
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Suppress warnings
warnings.filterwarnings('ignore')

# Statistical and ML libraries
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest, f_classif

# Time series specific libraries
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from scipy import stats 

# Deep learning libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [70]:

# Define the file paths
path1 = r'C:\Users\LENOVO\MachineLearningProhects\TimeSeriesForecasting_End_To_End\data\timeseries_model_data.csv'
path2 = r'C:\Users\TPWODL\New folder_Content\TimeSeriesForecasting_End_To_End\data\raw\timeseries_model_data.csv'

# Check which path exists and read CSV
if os.path.exists(path1):
    new_df = pd.read_csv(path1) 
elif os.path.exists(path2):
    new_df = pd.read_csv(path2)   
else:
    # Raise a FileNotFoundError if neither path is valid
    raise FileNotFoundError("‚ùå No valid data file found in either specified path.")

In [71]:
new_df.head(2)

Unnamed: 0,time,temp_max_C,precip_mm,day_of_week,month,wind_speed_max_m_s,Complaint_Count,year,relative_humidity_2m_mean,weather_label,Complaint_Count_diff1
0,2022-06-10,41.0,0.1,4,6,16.6,10,2022,47,1,
1,2022-06-11,41.0,0.2,5,6,25.3,12,2022,42,1,2.0


In [72]:
df = new_df.drop(columns=['relative_humidity_2m_mean', 'Complaint_Count_diff1'])

In [73]:
df.set_index('time', inplace=True)

In [74]:
df.head(2)

Unnamed: 0_level_0,temp_max_C,precip_mm,day_of_week,month,wind_speed_max_m_s,Complaint_Count,year,weather_label
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-06-10,41.0,0.1,4,6,16.6,10,2022,1
2022-06-11,41.0,0.2,5,6,25.3,12,2022,1


In [75]:
df.shape

(1183, 8)

In [76]:
# Separate features (X) and target (y)
X = df.drop(columns=['Complaint_Count'])
y = df['Complaint_Count']

In [78]:
# Split data into train and test sets (80-20 split)
train_size = int(len(df) * 0.8)
y_train, y_test = y[:train_size], y[train_size:]
X_train, X_test = X[:train_size], X[train_size:]
# Print shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (946, 7)
X_test shape: (237, 7)
y_train shape: (946,)
y_test shape: (237,)


In [79]:
print(f"Training set size: {len(y_train)}")
print(f"Test set size: {len(y_test)}")
print(f"Date range - Train: {df.index[0]} to {df.index[train_size-1]}")
print(f"Date range - Test: {df.index[train_size]} to {df.index[-1]}")


Training set size: 946
Test set size: 237
Date range - Train: 2022-06-10 to 2025-01-30
Date range - Test: 2025-01-31 to 2025-10-19


### MODEL TRAINING

In [81]:
# Define SARIMAX parameters
order = (1, 1, 1)  # (p, d, q)
seasonal_order = (1, 1, 1, 24)  # (P, D, Q, s)

# Train SARIMAX model
model = SARIMAX(
    endog=y_train,
    exog=X_train,
    order=order,
    seasonal_order=seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)


# Fit the model
results = model.fit(disp=False, maxiter=200)


In [82]:
print("\nModel trained successfully!")
print("\n" + results.summary().tables[0].as_text())


Model trained successfully!

                                     SARIMAX Results                                      
Dep. Variable:                    Complaint_Count   No. Observations:                  946
Model:             SARIMAX(1, 1, 1)x(1, 1, 1, 24)   Log Likelihood               -3296.430
Date:                            Wed, 29 Oct 2025   AIC                           6616.860
Time:                                    21:05:58   BIC                           6674.422
Sample:                                         0   HQIC                          6638.855
                                            - 946                                         
Covariance Type:                              opg                                         


In [83]:
# MODEL EVALUATION ON TEST SET

# Make predictions on test set
y_pred_test = results.forecast(steps=len(y_test), exog=X_test)
