In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error

In [3]:
file_name = "/content/drive/Othercomputers/My Laptop/Water Usage Pattern/Dataset/water-consumption2025.csv"

# Read dataset
df = pd.read_csv(file_name, header=1)

In [4]:
df.columns

Index(['Water Consumption (kL)', 'Q1 12/13', 'Q2 12/13', 'Q3 12/13',
       'Q4 12/13', 'Q1 13/14', 'Q2 13/14', 'Q3 13/14', 'Q4 13/14', 'Q1 14/15',
       'Q2 14/15', 'Q3 14/15', 'Q4 14/15', 'Q1 15/16', 'Q2 15/16', 'Q3 15/16',
       'Q4 15/16', 'Q1 16/17', 'Q2 16/17', 'Q3 16/17', 'Q4 16/17', 'Q1 17/18',
       'Q2 17/18', 'Q3 17/18', 'Q4 17/18', 'Q1 18/19', 'Q2 18/19', 'Q3 18/19',
       'Q4 18/19', 'Q1 19/20', 'Q2 19/20', 'Q3 19/20', 'Q4 19/20', 'Q1 20/21',
       'Q2 20/21', 'Q3 20/21', 'Q4 20/21', 'Q1 21/22', 'Q2 21/22', 'Q3 21/22',
       'Q4 21/22', 'Q1 22/23', 'Q2 22/23', 'Q3 22/23', 'Q4 22/23', 'Q1 23/24',
       'Q2 23/24', 'Q3 23/24', 'Q4 23/24', 'Q1 24/25', 'Q2 24/25'],
      dtype='object')

In [None]:
# Set style for plots
plt.style.use('seaborn')
sns.set_palette("husl")

# Clean the data
# First row is total consumption, second row is connections (starts later)
consumption = df.iloc[0:1].dropna(axis=1).T
consumption.columns = ['Total_Consumption']
consumption.index = pd.to_datetime(consumption.index.str.replace('Q1 ', 'Q1-').str.replace('Q2 ', 'Q2-').str.replace('Q3 ', 'Q3-').str.replace('Q4 ', 'Q4-'))

# Convert to quarterly frequency
consumption = consumption.asfreq('Q')

# Plot the raw time series
plt.figure(figsize=(14, 6))
consumption['Total_Consumption'].plot(title='Total Water Consumption Over Time', linewidth=2)
plt.ylabel('Consumption (kL)')
plt.xlabel('Quarter')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
## 1. Time Series Decomposition
# Decompose into trend, seasonal, and residual components
decomposition = seasonal_decompose(consumption['Total_Consumption'], model='additive', period=4)

plt.figure(figsize=(14, 10))
plt.subplot(4, 1, 1)
decomposition.observed.plot(title='Observed', ax=plt.gca())
plt.subplot(4, 1, 2)
decomposition.trend.plot(title='Trend', ax=plt.gca())
plt.subplot(4, 1, 3)
decomposition.seasonal.plot(title='Seasonal', ax=plt.gca())
plt.subplot(4, 1, 4)
decomposition.resid.plot(title='Residual', ax=plt.gca())
plt.tight_layout()
plt.show()

In [None]:
## 2. Stationarity Check
def test_stationarity(timeseries):
    # Perform Dickey-Fuller test
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)

test_stationarity(consumption['Total_Consumption'].dropna())

In [None]:
## 3. Differencing to achieve stationarity
consumption['First_Difference'] = consumption['Total_Consumption'].diff()
consumption['Seasonal_Difference'] = consumption['Total_Consumption'].diff(4)

plt.figure(figsize=(14, 6))
consumption['First_Difference'].plot(title='First Order Differencing', linewidth=2)
plt.grid(True)
plt.show()

test_stationarity(consumption['First_Difference'].dropna())

In [None]:
## 4. ACF and PACF plots
plt.figure(figsize=(14, 6))
plot_acf(consumption['First_Difference'].dropna(), lags=20, ax=plt.gca())
plt.title('Autocorrelation Function')
plt.show()

plt.figure(figsize=(14, 6))
plot_pacf(consumption['First_Difference'].dropna(), lags=20, ax=plt.gca())
plt.title('Partial Autocorrelation Function')
plt.show()

In [None]:
## 5. ARIMA Modeling
# Split into train and test sets
train_size = int(len(consumption) * 0.8)
train, test = consumption['Total_Consumption'][0:train_size], consumption['Total_Consumption'][train_size:]

# Fit ARIMA model
model = ARIMA(train, order=(1,1,1))
model_fit = model.fit()
print(model_fit.summary())

# Forecast
forecast = model_fit.forecast(steps=len(test))
forecast = pd.Series(forecast, index=test.index)

# Plot forecasts against actual values
plt.figure(figsize=(14, 6))
plt.plot(train, label='Training')
plt.plot(test, label='Actual')
plt.plot(forecast, label='Forecast')
plt.title('ARIMA Forecast vs Actuals')
plt.legend()
plt.grid(True)
plt.show()

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(test, forecast))
print(f'RMSE: {rmse}')

In [None]:
## 6. Prophet Model (Facebook's forecasting tool)
# Prepare data for Prophet
prophet_df = consumption.reset_index()[['index', 'Total_Consumption']]
prophet_df.columns = ['ds', 'y']

# Create and fit model
model = Prophet(seasonality_mode='multiplicative', yearly_seasonality=True)
model.fit(prophet_df.dropna())

# Make future dataframe
future = model.make_future_dataframe(periods=8, freq='Q')
forecast = model.predict(future)

# Plot the forecast
fig = model.plot(forecast)
plt.title('Prophet Forecast')
plt.xlabel('Date')
plt.ylabel('Water Consumption (kL)')
plt.show()

# Plot components
fig = model.plot_components(forecast)
plt.show()

In [None]:
## 7. Advanced Visualization - Seasonal Patterns
# Extract month/year for grouping
consumption['Year'] = consumption.index.year
consumption['Quarter'] = consumption.index.quarter

plt.figure(figsize=(14, 6))
sns.boxplot(x='Quarter', y='Total_Consumption', data=consumption)
plt.title('Quarterly Distribution of Water Consumption')
plt.ylabel('Consumption (kL)')
plt.xlabel('Quarter')
plt.show()

In [None]:
# Yearly comparison
plt.figure(figsize=(14, 6))
sns.lineplot(x='Quarter', y='Total_Consumption', hue='Year',
             data=consumption[consumption['Year'] > 2018], palette='viridis')
plt.title('Year-over-Year Quarterly Comparison')
plt.ylabel('Consumption (kL)')
plt.xlabel('Quarter')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()