# Import Required Libraries
Import the necessary libraries, including pandas, numpy, and any other required libraries.

In [1]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

# Set plot style for better visualization
sns.set(style="whitegrid")

# Load Imputed Databases
Load the imputed databases from their respective sources.

In [7]:
# Load Imputed Databases

# Load the imputed databases from their respective sources
# Assuming the databases are in CSV format and stored in the same directory

# Load the first imputed database
imputed_db1 = pd.read_csv('../data/balancing_data_imputed.csv')

# Load the second imputed database
imputed_db2 = pd.read_csv('../data/demand_load_data_imputed.csv')

# Load the third imputed database
imputed_db3 = pd.read_csv('../data/price_data_imputed.csv')

# Display the first few rows of each database to verify loading
print("First imputed database:")
print(imputed_db1.head())

print("\nSecond imputed database:")
print(imputed_db2.head())

print("\nThird imputed database:")
print(imputed_db3.head())

First imputed database:
              GMT Time  System Price (ESO Outturn) - GB (£/MWh)  \
0  2018-01-01 00:00:00                                    55.94   
1  2018-01-01 00:30:00                                    55.94   
2  2018-01-01 01:00:00                                    62.94   
3  2018-01-01 01:30:00                                    31.00   
4  2018-01-01 02:00:00                                    60.81   

   NIV Outturn (+ve long) - GB (MW)  BM Bid Acceptances (total) - GB (MW)  \
0                            -77.05                              -1833.86   
1                           -334.76                              -1443.78   
2                           -219.78                              -1580.12   
3                            286.63                              -1699.05   
4                           -141.41                              -1413.27   

   BM Offer Acceptances (total) - GB (MW)  \
0                                 1910.98   
1                   

# Merge Databases
Merge the loaded databases into a single final database.

In [8]:
# Merge Databases

# Merge the loaded databases into a single final database
final_db = pd.concat([imputed_db1, imputed_db2, imputed_db3], ignore_index=True)

# Verify the final database has all data with no loss
print("Final merged database:")
print(final_db.info())

# Display the first few rows of the final merged database
print("\nFirst few rows of the final merged database:")
print(final_db.head())

Final merged database:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354960 entries, 0 to 354959
Data columns (total 14 columns):
 #   Column                                                   Non-Null Count   Dtype  
---  ------                                                   --------------   -----  
 0   GMT Time                                                 354960 non-null  object 
 1   System Price (ESO Outturn) - GB (£/MWh)                  118320 non-null  float64
 2   NIV Outturn (+ve long) - GB (MW)                         118320 non-null  float64
 3   BM Bid Acceptances (total) - GB (MW)                     118320 non-null  float64
 4   BM Offer Acceptances (total) - GB (MW)                   118320 non-null  float64
 5   Total BSAD Volume - Turn Up - GB (MW)                    118320 non-null  float64
 6   Total BSAD Volume - Turn Down - GB (MW)                  118320 non-null  float64
 7   Total BSAD Volume - Total - GB (MW)                      118320 non-null  floa

# Verify Final Database
Verify that the final database has all data with no loss and is in good format.

In [9]:
# Verify Final Database

# Check for any missing values in the final database
missing_values = final_db.isnull().sum()
print("\nMissing values in the final merged database:")
print(missing_values)

# Ensure the final database is in good format by checking data types
print("\nData types in the final merged database:")
print(final_db.dtypes)

# Handle any encoding issues (assuming UTF-8 encoding)
try:
    final_db.to_csv('final_database.csv', encoding='utf-8', index=False)
    print("\nFinal database saved successfully with UTF-8 encoding.")
except Exception as e:
    print(f"\nError saving final database with UTF-8 encoding: {e}")

# Reload the final database to verify encoding
try:
    final_db_reloaded = pd.read_csv('final_database.csv', encoding='utf-8')
    print("\nFinal database reloaded successfully with UTF-8 encoding.")
    print(final_db_reloaded.head())
except Exception as e:
    print(f"\nError reloading final database with UTF-8 encoding: {e}")


Missing values in the final merged database:
GMT Time                                                        0
System Price (ESO Outturn) - GB (£/MWh)                    236640
NIV Outturn (+ve long) - GB (MW)                           236640
BM Bid Acceptances (total) - GB (MW)                       236640
BM Offer Acceptances (total) - GB (MW)                     236640
Total BSAD Volume - Turn Up - GB (MW)                      236640
Total BSAD Volume - Turn Down - GB (MW)                    236640
Total BSAD Volume - Total - GB (MW)                        236640
Intraday Volume (EPEX Outturn, APX, MID) - GB (MWh)        236640
Loss of Load Probability - Latest - GB ()                  236640
Actual Total Load - GB (MW)                                236640
Demand Outturn (ITSDO) - GB (MW)                           236640
Day Ahead Price (EPEX half-hourly, local) - GB (LC/MWh)    236640
Intraday Price (EPEX Outturn, APX, MID) - GB (£/MWh)       236640
dtype: int64

Data types in th

# Handle Encoding Issues
Check and handle any encoding issues in the final database.

In [None]:
# Handle Encoding Issues

# Handle any encoding issues (assuming UTF-8 encoding)
try:
    final_db.to_csv('final_database.csv', encoding='utf-8', index=False)
    print("\nFinal database saved successfully with UTF-8 encoding.")
except Exception as e:
    print(f"\nError saving final database with UTF-8 encoding: {e}")

# Reload the final database to verify encoding
try:
    final_db_reloaded = pd.read_csv('final_database.csv', encoding='utf-8')
    print("\nFinal database reloaded successfully with UTF-8 encoding.")
    print(final_db_reloaded.head())
except Exception as e:
    print(f"\nError reloading final database with UTF-8 encoding: {e}")

# Test for Seasonality
Test the final database for seasonality using appropriate statistical methods.

In [None]:
# Test for Seasonality

# Assuming the final database has a datetime column named 'date' and a target column named 'value'
# Convert the 'date' column to datetime format
final_db['date'] = pd.to_datetime(final_db['date'])

# Set the 'date' column as the index
final_db.set_index('date', inplace=True)

# Plot the time series data to visualize any apparent seasonality
plt.figure(figsize=(12, 6))
plt.plot(final_db['value'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.show()

# Perform seasonal decomposition using statsmodels
result = seasonal_decompose(final_db['value'], model='additive', period=12)

# Plot the seasonal decomposition components
result.plot()
plt.show()

# Check for seasonality by examining the seasonal component
seasonal_component = result.seasonal
print("Seasonal Component:")
print(seasonal_component.head())

# If seasonality is detected, discuss its impact on model training
seasonality_detected = seasonal_component.abs().sum() > 0
if seasonality_detected:
    print("\nSeasonality detected in the data. This will affect model training as follows:")
    print("1. Seasonal patterns need to be accounted for in the model.")
    print("2. Consider using models that can handle seasonality, such as SARIMA or Prophet.")
else:
    print("\nNo significant seasonality detected in the data. Proceed with model training without special adjustments for seasonality.")

# Impact of Seasonality on Model Training
Discuss how seasonality might affect proceeding with model training and the steps to address it.

In [None]:
# Impact of Seasonality on Model Training

# Discuss how seasonality might affect proceeding with model training and the steps to address it

# If seasonality is detected, discuss its impact on model training
seasonality_detected = seasonal_component.abs().sum() > 0
if seasonality_detected:
    print("\nSeasonality detected in the data. This will affect model training as follows:")
    print("1. Seasonal patterns need to be accounted for in the model.")
    print("2. Consider using models that can handle seasonality, such as SARIMA or Prophet.")
    print("3. Ensure that the training and testing datasets are split in a way that preserves the seasonal patterns.")
    print("4. Perform additional feature engineering to capture seasonal effects, such as adding seasonal dummy variables or Fourier terms.")
else:
    print("\nNo significant seasonality detected in the data. Proceed with model training without special adjustments for seasonality.")