In [10]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('raw DAM IDM price.csv')

# Remove square brackets from date column and convert to datetime
df['Date (WET)'] = df['Date (WET)'].str.strip('[]')
df['Date (WET)'] = pd.to_datetime(df['Date (WET)'], format='%d/%m/%Y %H:%M')

# Localize to WET, convert to UK time, then remove timezone info
df['Date (UK)'] = df['Date (WET)'].dt.tz_localize('WET', ambiguous='NaT').dt.tz_convert('Europe/London').dt.tz_localize(None)

# Keep only the columns we need: Date, DAM, IDM
df = df[['Date (UK)', 'DAM price', 'IDM price']]

# Display the result
print(df.head())

df.to_csv('UK_time_DAM_IDM_data.csv', index=False)

            Date (UK)  DAM price  IDM price
0 2015-01-01 00:00:00      39.60      37.76
1 2015-01-01 01:00:00      37.18      32.60
2 2015-01-01 02:00:00      32.40      30.28
3 2015-01-01 03:00:00      29.29      25.99
4 2015-01-01 04:00:00      27.24      25.54


In [11]:
# Check for missing values in the cleaned data
df_cleaned = pd.read_csv('UK_time_DAM_IDM_data.csv')

print("Missing values check:")
print(df_cleaned.isnull().sum())
print(f"\nTotal rows: {len(df_cleaned)}")
print(f"Complete rows (no missing values): {df_cleaned.dropna().shape[0]}")

# Fill missing values with column means
df_cleaned.fillna(df_cleaned.mean(numeric_only=True), inplace=True)
df_cleaned.to_csv('UK_time_DAM_IDM_data.csv', index=False)

# Remove rows with empty UK date
df_cleaned = df_cleaned.dropna(subset=['Date (UK)'])
print(f"Rows after removing empty dates: {len(df_cleaned)}")

df_cleaned.to_csv('cleaned_DAM_IDM_data.csv', index=False)

Missing values check:
Date (UK)     20
DAM price      0
IDM price    220
dtype: int64

Total rows: 87672
Complete rows (no missing values): 87432
Rows after removing empty dates: 87652
