In [1]:
# packages
import os
import numpy as np
import pandas as pd

In [2]:
# mount your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Reading data sets and merging it together**

In [3]:
df_weather = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/weather_data.csv', sep = ',', index_col=0)
df_weather['timestamp'] = pd.to_datetime(df_weather['date'])
df_weather.set_index('timestamp', inplace=True)
df_weather.drop('date', axis = 1, inplace = True)
df_redispatch = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/wind_redispatch_2020_24.csv', sep = ';', index_col=0)
df_redispatch.index = pd.to_datetime(df_redispatch.index)
df_solar = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/solar_feedin_2020_24.csv', sep = ';', index_col=0)
df_solar.index = pd.to_datetime(df_solar.index)
df_demand = pd.read_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/electricity_demand_2020_24.csv', sep = ';', index_col=0)
df_demand.index = pd.to_datetime(df_demand.index)

In [4]:
df_redispatch_subset = df_redispatch[['redispatch', 'level']]
df = df_redispatch_subset.merge(df_weather, how='outer', left_index=True, right_index=True)
df = df.resample('15T').first()
df = df.merge(df_solar, how='outer', left_index=True, right_index=True)
df = df.merge(df_demand, how='outer', left_index=True, right_index=True)

In [5]:
df = df[(df.index.date >= pd.to_datetime('2020-01-01').date()) & (df.index.date <= pd.to_datetime('2023-12-30').date())]

**Impute missing values**

In [6]:
# impute missing values for forecast solar with actual solar
for index, row in df.iterrows():
    if pd.isna(row['forecast_solar_MW']):
        df.at[index, 'forecast_solar_MW'] = row['actual_solar_MW']

# impute other missing values by interpolation
columns_to_interpolate = ["wind_speed_m/s",  "wind_direction_degrees", "humidity_percent", "radiation_global_J/m2", "air_temperature_K", "wind_gust_max_m/s", "wind_direction_gust_max_degrees", "forecast_solar_MW", "total_grid_load_MWh", "residual_load_MWh", "pumped_storage_MWh"]
df[columns_to_interpolate] = df[columns_to_interpolate].interpolate(method='linear', limit_direction='both')
df.drop("actual_solar_MW", axis = 1, inplace = True)

In [7]:
# save csv
df.to_csv('/content/drive/My Drive/ms_wind_curtailment_prediction/curtailment_target_features.csv', sep = ';', index=True, index_label='timestamp')