# Dataset Preparation - Wind Power Forecast

Source: https://www.kaggle.com/datasets/theforcecoder/wind-power-forecasting/data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [62]:
data = pd.read_csv('weather/Turbine_Data.csv', sep=',')
data.drop_duplicates(inplace=True)
data = data[data["Unnamed: 0"] >= "2018"]
data.head()

Unnamed: 0.1,Unnamed: 0,ActivePower,AmbientTemperatue,BearingShaftTemperature,Blade1PitchAngle,Blade2PitchAngle,Blade3PitchAngle,ControlBoxTemperature,GearboxBearingTemperature,GearboxOilTemperature,...,GeneratorWinding2Temperature,HubTemperature,MainBoxTemperature,NacellePosition,ReactivePower,RotorRPM,TurbineStatus,WTG,WindDirection,WindSpeed
144,2018-01-01 00:00:00+00:00,-5.357727,23.148729,,,,,,,,...,,,,8.0,-9.96083,,,G01,8.0,2.279088
145,2018-01-01 00:10:00+00:00,-5.82236,23.039754,,,,,,,,...,,,,300.428571,-9.628441,,,G01,300.428571,2.339343
146,2018-01-01 00:20:00+00:00,-5.279409,22.948703,,,,,,,,...,,,,340.0,-9.491235,,,G01,340.0,2.45561
147,2018-01-01 00:30:00+00:00,-4.648054,22.966851,,,,,,,,...,,,,345.0,-9.856136,,,G01,345.0,2.026754
148,2018-01-01 00:40:00+00:00,-4.684632,22.93652,,,,,,,,...,,,,345.0,-9.745593,,,G01,345.0,1.83142


I will follow the same training setup as in https://www.kaggle.com/code/brunoricardobs13/wind-power-forecasting-predictions-using-xgboost, selecting only a subset of columns for training:

In [63]:
data = data[~data["ActivePower"].isna()]
data.isna().sum()

features = ['WindSpeed', 'RotorRPM', 'GeneratorRPM', 'GearboxOilTemperature', 'GearboxBearingTemperature']
label = "ActivePower"

data = data[~data[features].isna().max(axis=1)]
data = data[features + [label]]

# Normalize
print(data.mean())
print(data.std())
data = (data - data.mean()) / data.std()
data = data.to_numpy()

WindSpeed                       5.935267
RotorRPM                        9.894092
GeneratorRPM                 1103.305483
GearboxOilTemperature          57.624848
GearboxBearingTemperature      64.502379
ActivePower                   630.689148
dtype: float64
WindSpeed                      2.563903
RotorRPM                       4.702846
GeneratorRPM                 524.089411
GearboxOilTemperature          6.195655
GearboxBearingTemperature      9.735098
ActivePower                  608.064994
dtype: float64


In [61]:
train_split = len(data) * 8 // 10
valid_split = len(data) * 9 // 10

# Save the data for different splits
np.save('weather/train.npy', data[:train_split])
np.save('weather/valid.npy', data[train_split-16:valid_split])
np.save('weather/test.npy', data[valid_split-16:])