In [1]:
import pandas as pd
import numpy as np
import glob
import datetime

In [2]:
st_eu = glob.glob('..\\raw_data\eua\EU_StorageData_GIE_2023-01-01_2024-12-17.csv')
st_spain = glob.glob('..\\raw_data\eua\SPAIN_StorageData_GIE_2024-11-18_2024-12-17.csv')
eua = glob.glob('..\\raw_data/eua/European Union Allowance (EUA) Yearly Futures Historical Data.csv')

def read_data(file):
    return pd.read_csv(file[0], sep=';')


st_eu = read_data(st_eu) # key = Gas Day Start
st_spain = read_data(st_spain) # key = Gas Day Start
eua = pd.read_csv(eua[0]) #key = Date

# Rename 'Gas Day Start' to 'Date' in st_eu to match the key in eua
st_eu.rename(columns={'Gas Day Start': 'Date'}, inplace=True)
st_eu.rename(columns={'Gas in storage (TWh)': 'gas_storage_eu'}, inplace=True)
st_spain.rename(columns={'Gas in storage (TWh)': 'gas_storage_spain'}, inplace=True)
st_spain.rename(columns={'Gas Day Start': 'Date'}, inplace=True)

# Convert 'Date' to datetime
eua['Date'] = pd.to_datetime(eua['Date'])
st_eu['Date'] = pd.to_datetime(st_eu['Date'])
st_spain['Date'] = pd.to_datetime(st_spain['Date'])

first_merge = pd.merge(st_eu, eua, on='Date', how='left')
merged_eua = pd.merge(first_merge, st_spain, on='Date', how='left')

In [3]:
eua

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2024-12-17,64.26,65.00,65.85,64.05,26.90K,-1.55%
1,2024-12-16,65.27,64.05,64.31,62.73,6.17K,1.30%
2,2024-12-13,64.43,65.81,66.61,64.15,20.48K,-2.53%
3,2024-12-12,66.10,68.53,68.84,65.54,47.62K,-3.69%
4,2024-12-11,68.63,68.30,69.58,67.91,43.39K,0.66%
...,...,...,...,...,...,...,...
500,2023-01-06,77.62,78.61,79.64,77.08,9.40K,-1.16%
501,2023-01-05,78.53,77.52,79.75,77.00,15.42K,1.33%
502,2023-01-04,77.50,82.71,84.12,77.25,18.11K,-6.56%
503,2023-01-03,82.94,85.12,86.17,81.87,11.74K,-2.79%


In [4]:
first_merge

Unnamed: 0,Status,Date,gas_storage_eu,Full (%),Trend (%),Injection (GWh/d),Withdrawal (GWh/d),Technical Capacity (TWh),Injection capacity (GWh/d),Withdrawal capacity (GWh/d),Price,Open,High,Low,Vol.,Change %
0,E,2024-12-16,889.5508,77.50,-0.41,326.28,5219.7,1147.8346,11727.31,19876.61,65.27,64.05,64.31,62.73,6.17K,1.30%
1,E,2024-12-15,894.2829,77.91,-0.36,431.56,4572.9,1147.8346,11726.92,19877.63,,,,,,
2,E,2024-12-14,898.4366,78.27,-0.49,246.54,5835.9,1147.8346,11726.36,19879.12,,,,,,
3,E,2024-12-13,904.0280,78.76,-0.68,105.91,8036.5,1147.8346,11725.50,19881.39,64.43,65.81,66.61,64.15,20.48K,-2.53%
4,E,2024-12-12,911.8127,79.44,-0.72,75.27,8341.4,1147.8346,11724.74,19883.40,66.10,68.53,68.84,65.54,47.62K,-3.69%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,E,2023-01-05,931.8386,83.24,-0.13,818.52,2141.7,1119.4312,11782.60,20067.76,78.53,77.52,79.75,77.00,15.42K,1.33%
712,E,2023-01-04,933.2785,83.37,-0.06,997.29,1675.3,1119.4212,11773.57,20035.70,77.50,82.71,84.12,77.25,18.11K,-6.56%
713,E,2023-01-03,933.9947,83.44,-0.08,1264.17,2198.3,1119.4212,11782.56,20067.86,82.94,85.12,86.17,81.87,11.74K,-2.79%
714,E,2023-01-02,934.9074,83.52,0.00,1521.14,1436.8,1119.4212,11782.60,20067.76,85.32,82.45,86.87,82.40,2.19K,3.72%


In [5]:
needed = merged_eua[['Date', 'Price', 'gas_storage_eu', 'gas_storage_spain']].copy()

In [6]:
formatted_data = pd.read_csv('..\\formatted_data\\formatted_data.csv')

In [7]:
needed

Unnamed: 0,Date,Price,gas_storage_eu,gas_storage_spain
0,2024-12-16,65.27,889.5508,31.6485
1,2024-12-15,,894.2829,31.7794
2,2024-12-14,,898.4366,31.8909
3,2024-12-13,64.43,904.0280,32.0031
4,2024-12-12,66.10,911.8127,32.1161
...,...,...,...,...
711,2023-01-05,78.53,931.8386,
712,2023-01-04,77.50,933.2785,
713,2023-01-03,82.94,933.9947,
714,2023-01-02,85.32,934.9074,


In [8]:
# Extract year, month, and day from the Date column in merged_eua
needed['Year'] = needed['Date'].dt.year
needed['Month'] = needed['Date'].dt.month
needed['Day'] = needed['Date'].dt.day

# Merge formatted_data with merged_eua on year, month, and day
new_data = pd.merge(formatted_data, needed, on=['Year', 'Month', 'Day'], how='left')

In [9]:
new_data.ffill(inplace=True)

In [11]:
new_data.drop(columns=['Date'], inplace=True)

In [12]:
new_data.to_csv('..\\formatted_data\\formatted_data_new.csv', index=False)