#  Electricity Demand Analysis and Forecasting 


**Notebook-wide Libraries & Setting**s 

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as  np
import datetime 

# path setup
import sys
import os
sys.path.append(os.path.abspath('../'))

# project utils imports
import plotly.express as px 
import plotly.io as pio
from src.visualisation.plot_utils import plotly_user_standard_settings
plotly_user_standard_settings(pio,px)


## Part 1: Data Processing


In [None]:
from src.data.make_dataset import read_and_proc_csvdata

**Load Raw Data**

In [None]:
df = pd.read_csv("../data/raw/uk_electricity_consumption_historic_demand_2009_2024.csv")
df.head(5)

**Data Description:**<br>
See the README.md for full details on the dataset. 
The important columns considered in the project include:
- settlement_date and settlement_period
- nd and tsd
- england_wales_demand
- embedded_wind and solar generations
- embedded_wind and solar capacities
- is_holiday

**Data Processing**

In [None]:
file_path = "../data/raw/uk_electricity_consumption_historic_demand_2009_2024.csv"
proc_df = read_and_proc_csvdata(file_path)

---------------------------------------------------------------------

## Part 2: Exploratory Data Analysis 

This involves an analysis to obtain insights from historical electricity demand and generation based on the following questions:
1. Statistical outlook of the historical data?
2. What is the historical electricity demand (tsd) trend? 
3. What is the percentage of demand that is met by the total generation?
4. What is the solar-wind mix from generation over time?
5. What is the generation utilisation from each source?

In [None]:
# 1. Plot raw demand 
raw_demand_plot = px.line(proc_df,
              x='date',
              y=['nd','tsd'],
              title='National vs Transmission Systems Electricity Demand over Time'
              ).update_layout(
                  xaxis_title='DateTime',
                  yaxis_title='Demand (MW)',
                  legend_title='Demand Type'
              )
raw_demand_plot.show()
#demand_plot.write_html(saving_path + "ND_and_TSD.html")  


In [None]:
#proc_df.info()

# Check periods are no more than 48 (i.e., no more than 2 datapoints captured per hour)
#print(proc_df[proc_df['period']>48]['date'])   

proc_df_stats = proc_df.describe()  
proc_df_stats   


In [None]:
print(f"Average Transmission Demand (TSD) is {round(proc_df_stats['tsd'][1],1)}MW")
print(f"Variation in Transmission Demand (TSD) is {round(proc_df_stats['tsd'][7],1)}MW")

**Statistical Outlook:**<br>
1. There is a presence of outliers in tsd before Januray 2013
2. There is more than 2 datapoints captured per hour in October of every year
3. TSD exhibits a downward trend over time, indicating reducing electricity demand

**Action:**<br>
- Remove outliers and periods with more than 2 datapoints.

In [None]:
df_flt = proc_df.query('tsd>10000').copy() 
df_flt.drop(index=df_flt[df_flt["period"] > 48].index, inplace=True)
df_flt.reset_index(drop=True, inplace=True)
#df_flt.head(5)

df_flt["period_hour"] = (df_flt["period"]).apply(
    lambda x: str(datetime.timedelta(hours=(x - 1) * 0.5)))
df_flt['date'] = pd.to_datetime(df_flt['date']) + pd.to_timedelta(df_flt['period_hour'])
df_flt.head(5)
df_flt.info()

In [None]:
df_01_08_2016 = df_flt.loc[(df_flt['date'] > "01-01-2016") & (df_flt['date'] < "01-08-2016")]
px.line(df_01_08_2016, x='date', y='tsd')

**TSD Distribution of TSD by Hour**

In [None]:
df_flt['hour'] = df_flt['period_hour'].str.split(":").str[0].astype(int)

#Box plot
fig = px.box(df_flt, x='hour', y='tsd', title= "Distribution of TSD by Hour",
             labels={'hour': 'Hour', 'tsd':'TSD'}, category_orders={'hour': list(range(24))})
fig.show()

**Notes:**<br>
Hourly TSD distribution shows that:
1. There is higher demand from 7:00 to 21:00, which is when most daily activity occurs. This period also has larger demand ranges. 
2. The periods outside 7:00 to 21:00 also exhibited more outliers, indicating the sparse days in which there was higher than usual demand. 


In [None]:
#---------------------------------------------------------------------------------------
## 
saving_path = "../../reports/figures/"


In [None]:

# 2. Plot demand excluding outliers
demand_flt_plot = px.line(df_flt,
              x='date',
              y=['nd','tsd'],
              title='National vs Transmission Systems Electricity Demand over Time'
              ).update_layout(
                  xaxis_title='DateTime',
                  yaxis_title='Demand (MW)',
                  legend_title='Demand Type'
              )
demand_flt_plot.show()
#demand_flt_plot.write_html(saving_path + "ND_and_TSD_without_outliers.html")  


In [None]:

# 3. Plot Total Generation vs TSD 
df_flt['total_generation'] = df_flt['solar_generation'] + df_flt['wind_generation']
demand_vs_gen_plot = px.line(df_flt,
              x='date',
              y=['tsd', 'total_generation'],
              title='Transmission System Demand vs Total Generation over time'
              ).update_layout(
                  xaxis_title='DateTime',
                  yaxis_title='Megawatts',
                  legend_title='Type'
              )
demand_vs_gen_plot.show()
#demand_vs_gen_plot.write_html(saving_path + "TSD_vs_generation.html")  

1. Over time, as TSD reduces, total generation from solar and wind increases. <br>
2. From the earlier image, ND and TSD exhibit downward trends over time. In addition, the tsd peak to peak also reduces over time, indicating reducing electricity demands. <br>

In [None]:
# 3.1 Plot Total Generation vs TSD as percentage 
df_flt['gen_to_demand_ratio'] = (df_flt['total_generation']/df_flt['tsd'])*100
gen_to_demand_ratio = px.line(df_flt,
              x='date',
              y=['gen_to_demand_ratio'],
              title='Total Generation to Transmission System Demand Ratio'
              ).update_layout(
                  xaxis_title='DateTime',
                  yaxis_title='Generation to Demand (%)',
                  
              )
gen_to_demand_ratio.show()
#gen_to_demand_ratio.write_html(saving_path + "gen_to_demand_ratio.html")  

1. Solar and wind genenration combined contributed to over 20% of TSD from
2015 onwards, indicating the beginning of considerable contributions to UK electricity demand. 
2. In 2024, combined solar and wind contributions peaked at 71% of TSD. 

In [None]:
# 4. Plot of % contribution 
df_flt['year'] = df_flt['date'].dt.year

yearly_df_flt = df_flt.groupby('year').agg({
    'wind_generation':'sum',
    'solar_generation':'sum'}).reset_index()
yearly_df_flt['total_generation'] = yearly_df_flt['wind_generation']+yearly_df_flt['solar_generation']
yearly_df_flt['wind_contribution'] = (yearly_df_flt['wind_generation']/yearly_df_flt['total_generation'])*100
yearly_df_flt['solar_contribution'] = 100 - yearly_df_flt['wind_contribution']


gen_frac = px.scatter(yearly_df_flt, 
           x='year', 
           y=[ 'wind_contribution', 'solar_contribution'],
                         title='Annual Contribution of Wind and Solar Generation (%)'
                         ).update_layout(
                  xaxis_title='Year',
                  yaxis_title='Energy Source Contribution (%)',
                  legend_title='Type')
gen_frac.show()
#gen_frac.write_html(saving_path + "generation_source_contribution.html")


Note:<br>
1. Wind provided significant contributions from 2009 (100%) to 2015 (58.2%) 
compared to solar. 
2. Wind and solar had contributions within 40% to 60% from 2015 onwards with both 
contributing almost the same in 2024. 

In [None]:
# 6. Plot of Generator utilisation

df_flt['solar_utilisation'] = (df_flt['solar_generation']/df_flt['solar_capacity'])*100
df_flt['wind_utilisation'] = (df_flt['wind_generation']/df_flt['wind_capacity'])*100

util_plot = px.line(df_flt, x='date', y=['wind_utilisation', 'solar_utilisation'],
                    title='Wind and Solar Utilisation'
                    ).update_layout(
                        xaxis_title='Year',
                        yaxis_title='Utilisation (%)',
                        legend_title='Type'
                        
                    )
util_plot.show()
#util_plot.write_html(saving_path + "generation_source_contribution.html")
"""
Note:
There are instances in which wind capacity is lower than generation. 
""" 



In [None]:


# ---------------------------------------------------------------------------
trimmed_df_flt = df_flt[['date',
                         'tsd',
                         'solar_generation',
                         'wind_generation',
                         'solar_capacity',
                         'wind_capacity']]

df_flt['cumulative_wind_util'] = df_flt.groupby('year')['wind_utilisation'].cumsum()
df_flt['cumulative_solar_util'] = df_flt.groupby('year')['solar_utilisation'].cumsum()

### Save Cleaned Dataset

In [None]:
# At the end of EDA plots above we now have the dataset to conduct Featureing Engineering

df_flt.to_pickle("../data/interim/uk_data_processed_postEDA.pkl")

In [None]:
df_flt.columns

----------------------------------------------------------------------
