In [18]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [9]:
df = pd.read_csv("powerconsumption.csv")
df.head()

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,34055.6962,16128.87538,20240.96386
1,1/1/2017 0:10,6.414,74.5,0.083,0.07,0.085,29814.68354,19375.07599,20131.08434
2,1/1/2017 0:20,6.313,74.5,0.08,0.062,0.1,29128.10127,19006.68693,19668.43373
3,1/1/2017 0:30,6.121,75.0,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,27335.6962,17872.34043,18442.40964


In [14]:
df['Datetime'] = pd.to_datetime(df['Datetime'])
df_hourly = df.set_index('Datetime').resample('h').mean().reset_index()
df_hourly.head(25)

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
0,2017-01-01 00:00:00,6.196833,75.066667,0.081833,0.0635,0.098833,29197.974683,18026.74772,19252.048193
1,2017-01-01 01:00:00,5.548833,77.583333,0.082,0.056833,0.1125,24657.21519,16078.419453,17042.891567
2,2017-01-01 02:00:00,5.054333,78.933333,0.082333,0.063,0.129167,22083.037973,14330.699088,15676.144578
3,2017-01-01 03:00:00,5.004333,77.083333,0.082833,0.059833,0.141,20811.13924,13219.452887,14883.855422
4,2017-01-01 04:00:00,5.097667,74.05,0.082333,0.058,0.122833,20475.949367,12921.580547,14317.108433
5,2017-01-01 05:00:00,4.741667,75.233333,0.081,0.065833,0.118833,20807.088607,13069.908815,14395.180722
6,2017-01-01 06:00:00,4.571667,75.75,0.082167,0.061667,0.1255,21648.607595,13596.352585,14513.734938
7,2017-01-01 07:00:00,4.453833,73.4,0.082667,0.0655,0.1195,18540.759495,11449.848023,11552.771085
8,2017-01-01 08:00:00,4.655167,73.6,0.082667,13.763833,6.308667,19605.06329,11738.601822,11190.361447
9,2017-01-01 09:00:00,4.609167,73.983333,0.082167,144.8,25.215,22905.316455,13814.589667,13026.506023


In [15]:
df_hourly = df_hourly.drop(columns=["Temperature", "Humidity",	"WindSpeed", "GeneralDiffuseFlows", "DiffuseFlows"])

In [21]:
df_hourly.to_csv("powerconsumption_hourly.csv", index=False)

In [16]:
import plotly.express as px
fig = px.line(df, x=df.Datetime, y=df.PowerConsumption_Zone1, title='Load Data Over Time')
fig.show()

In [11]:
import plotly.express as px
fig = px.line(df, x=df.Datetime, y=df.PowerConsumption_Zone2, title='Load Data Over Time')
fig.show()

#### Time Series Model

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

zones = ['PowerConsumption_Zone1', 'PowerConsumption_Zone2', 'PowerConsumption_Zone3']
arima_results = {}

# Split train (first 11 months) and test (12th month)
train_df = df_hourly[df_hourly['Datetime'] < '2017-12-01']
test_df = df_hourly[(df_hourly['Datetime'] >= '2017-12-01') & (df_hourly['Datetime'] < '2018-01-01')]

for zone in zones:
    y_train = train_df[zone]
    y_test = test_df[zone]
    
    # Fit ARIMA model (order can be tuned, here (1,1,1) as example)
    model = ARIMA(y_train, order=(1,1,1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=len(y_test))
    
    mse = mean_squared_error(y_test, forecast)
    arima_results[zone] = {'ARIMA_MSE': mse, 'model_summary': model_fit.summary()}

arima_results


In [20]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

zones = ['PowerConsumption_Zone1', 'PowerConsumption_Zone2', 'PowerConsumption_Zone3']
arima_results = {}

# Split train (first 11 months) and test (12th month)
train_df = df_hourly[df_hourly['Datetime'] < '2017-12-01']
test_df = df_hourly[(df_hourly['Datetime'] >= '2017-12-01') & (df_hourly['Datetime'] < '2018-01-01')]

for zone in zones:
    y_train = train_df[zone]
    y_test = test_df[zone]
    
    # Fit ARIMA model (order can be tuned, here (1,1,1) as example)
    model = ARIMA(y_train, order=(1,1,1))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=len(y_test))
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test_df['Datetime'], y=y_test, mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=test_df['Datetime'], y=forecast, mode='lines', name='Forecast'))
    fig.show()
    mse = mean_squared_error(y_test, forecast)
    arima_results[zone] = {'ARIMA_MSE': mse, 'model_summary': model_fit.summary()}

arima_results

{'PowerConsumption_Zone1': {'ARIMA_MSE': 46714342.76280084,
  'model_summary': <class 'statsmodels.iolib.summary.Summary'>
  """
                                   SARIMAX Results                                  
  Dep. Variable:     PowerConsumption_Zone1   No. Observations:                 8016
  Model:                     ARIMA(1, 1, 1)   Log Likelihood              -72961.122
  Date:                    Sat, 28 Jun 2025   AIC                         145928.244
  Time:                            15:45:16   BIC                         145949.211
  Sample:                                 0   HQIC                        145935.420
                                     - 8016                                         
  Covariance Type:                      opg                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
  ------------------------------------------------------------------------------
  ar.L1          0.4341      

In [23]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from datetime import timedelta
import pickle
import warnings
warnings.filterwarnings("ignore")

# Loading and parsing the CSV data
data = pd.read_csv('powerconsumption_hourly.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'])
data.set_index('Datetime', inplace=True)

# Defining zones for analysis
zones = ['PowerConsumption_Zone1', 'PowerConsumption_Zone2', 'PowerConsumption_Zone3']

# Initializing dictionaries to store models and forecasts
models = {}
forecasts = {}
forecast_steps = 24  # Forecasting next 24 hours

# Fitting SARIMA model for each zone and exporting to .pkl
for zone in zones:
    # Preparing time series data for the zone
    series = data[zone].dropna()
    
    # Fitting SARIMA model (p,d,q) = (1,1,1), seasonal (P,D,Q,s) = (1,1,1,24)
    model = SARIMAX(series, 
                    order=(1, 1, 1), 
                    seasonal_order=(1, 1, 1, 24),
                    enforce_stationarity=False,
                    enforce_invertibility=False)
    model_fit = model.fit(disp=False)
    models[zone] = model_fit
    
    # Exporting the model to a .pkl file
    pickle_filename = f'{zone}_model.pkl'
    with open(pickle_filename, 'wb') as file:
        pickle.dump(model_fit, file)
    
    # Forecasting next 24 hours
    forecast = model_fit.forecast(steps=forecast_steps)
    forecasts[zone] = forecast

# Creating forecast datetime index
last_date = data.index[-1]
forecast_dates = pd.date_range(start=last_date + timedelta(hours=1), periods=forecast_steps, freq='H')

# Plotting historical data and forecasts
plt.figure(figsize=(15, 10))
for i, zone in enumerate(zones, 1):
    plt.subplot(3, 1, i)
    # Plotting last 168 hours (1 week) of historical data
    plt.plot(data.index[-168:], data[zone][-168:], label='Historical', color='blue')
    # Plotting forecast
    plt.plot(forecast_dates, forecasts[zone], label='Forecast', color='red', linestyle='--')
    plt.title(f'{zone} - Power Consumption Forecast')
    plt.xlabel('Date')
    plt.ylabel('Power Consumption')
    plt.legend()
    plt.grid(True)
plt.tight_layout()

# Saving the plot
plt.savefig('power_consumption_forecast.png')
plt.close()

# Saving forecast results to CSV
forecast_df = pd.DataFrame({
    'Datetime': forecast_dates,
    'Forecast_Zone1': forecasts['PowerConsumption_Zone1'],
    'Forecast_Zone2': forecasts['PowerConsumption_Zone2'],
    'Forecast_Zone3': forecasts['PowerConsumption_Zone3']
})
forecast_df.to_csv('power_consumption_forecast.csv', index=False)

# Printing forecast summary and confirmation of model export
print("Forecast for the next 24 hours saved to 'power_consumption_forecast.csv'")
print("SARIMA models exported as .pkl files:")
for zone in zones:
    print(f"- {zone}_model.pkl")
print("\nForecast Summary:")
print(forecast_df)

Forecast for the next 24 hours saved to 'power_consumption_forecast.csv'
SARIMA models exported as .pkl files:
- PowerConsumption_Zone1_model.pkl
- PowerConsumption_Zone2_model.pkl
- PowerConsumption_Zone3_model.pkl

Forecast Summary:
                               Datetime  Forecast_Zone1  Forecast_Zone2  \
2017-12-31 00:00:00 2017-12-31 00:00:00    26068.322765    22286.899649   
2017-12-31 01:00:00 2017-12-31 01:00:00    23439.358832    19505.453302   
2017-12-31 02:00:00 2017-12-31 02:00:00    21966.494608    17918.935397   
2017-12-31 03:00:00 2017-12-31 03:00:00    21154.503142    17191.216665   
2017-12-31 04:00:00 2017-12-31 04:00:00    20928.675290    17068.533151   
2017-12-31 05:00:00 2017-12-31 05:00:00    21468.738949    17694.462689   
2017-12-31 06:00:00 2017-12-31 06:00:00    22982.552990    19032.452458   
2017-12-31 07:00:00 2017-12-31 07:00:00    21941.799950    18237.412061   
2017-12-31 08:00:00 2017-12-31 08:00:00    23340.437728    19225.864855   
2017-12-31 09:0

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from datetime import timedelta
import pickle
import warnings
warnings.filterwarnings("ignore")

# Loading and parsing the CSV data
data = pd.read_csv('load_data/powerconsumption_hourly.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'])
data.set_index('Datetime', inplace=True)

# Defining zones for analysis
zones = ['PowerConsumption_Zone1', 'PowerConsumption_Zone2', 'PowerConsumption_Zone3']

# Initializing dictionaries to store models and forecasts
models = {}
forecasts = {}
forecast_steps = 24  # Forecasting next 24 hours

# Fitting Holt-Winters model for each zone and exporting to .pkl
for zone in zones:
    # Preparing time series data for the zone
    series = data[zone].dropna()
    
    # Fitting Holt-Winters model with additive trend and seasonality (period=24 for daily cycle)
    model = ExponentialSmoothing(
        series,
        trend='add',
        seasonal='add',
        seasonal_periods=24
    )
    model_fit = model.fit()
    models[zone] = model_fit
    
    # Exporting the model to a .pkl file
    pickle_filename = f'{zone}_model.pkl'
    with open(pickle_filename, 'wb') as file:
        pickle.dump(model_fit, file)
    
    # Forecasting next 24 hours
    forecast = model_fit.forecast(steps=forecast_steps)
    forecasts[zone] = forecast

# Creating forecast datetime index
last_date = data.index[-1]
forecast_dates = pd.date_range(start=last_date + timedelta(hours=1), periods=forecast_steps, freq='H')

# Plotting historical data and forecasts
plt.figure(figsize=(15, 10))
for i, zone in enumerate(zones, 1):
    plt.subplot(3, 1, i)
    # Plotting last 168 hours (1 week) of historical data
    plt.plot(data.index[-168:], data[zone][-168:], label='Historical', color='blue')
    # Plotting forecast
    plt.plot(forecast_dates, forecasts[zone], label='Forecast', color='red', linestyle='--')
    plt.title(f'{zone} - Power Consumption Forecast (Holt-Winters)')
    plt.xlabel('Date')
    plt.ylabel('Power Consumption')
    plt.legend()
    plt.grid(True)
plt.tight_layout()

# Saving the plot
plt.savefig('power_consumption_forecast.png')
plt.close()

# Saving forecast results to CSV
forecast_df = pd.DataFrame({
    'Datetime': forecast_dates,
    'Forecast_Zone1': forecasts['PowerConsumption_Zone1'],
    'Forecast_Zone2': forecasts['PowerConsumption_Zone2'],
    'Forecast_Zone3': forecasts['PowerConsumption_Zone3']
})
forecast_df.to_csv('power_consumption_forecast.csv', index=False)

# Printing forecast summary and confirmation of model export
print("Forecast for the next 24 hours saved to 'power_consumption_forecast.csv'")
print("Holt-Winters models exported as .pkl files:")
for zone in zones:
    print(f"- {zone}_model.pkl")
print("\nForecast Summary:")
print(forecast_df)

Forecast for the next 24 hours saved to 'power_consumption_forecast.csv'
Holt-Winters models exported as .pkl files:
- PowerConsumption_Zone1_model.pkl
- PowerConsumption_Zone2_model.pkl
- PowerConsumption_Zone3_model.pkl

Forecast Summary:
                               Datetime  Forecast_Zone1  Forecast_Zone2  \
2017-12-31 00:00:00 2017-12-31 00:00:00    25606.559583    21256.862902   
2017-12-31 01:00:00 2017-12-31 01:00:00    23099.683668    18317.719011   
2017-12-31 02:00:00 2017-12-31 02:00:00    21721.842151    16649.354264   
2017-12-31 03:00:00 2017-12-31 03:00:00    20960.402307    15854.346395   
2017-12-31 04:00:00 2017-12-31 04:00:00    20870.239712    15719.388357   
2017-12-31 05:00:00 2017-12-31 05:00:00    21569.022668    16354.416164   
2017-12-31 06:00:00 2017-12-31 06:00:00    23285.960774    17827.166118   
2017-12-31 07:00:00 2017-12-31 07:00:00    22370.260611    17111.613303   
2017-12-31 08:00:00 2017-12-31 08:00:00    24019.866365    18286.555672   
2017-12-3