Pip install the following packages
1. wetterdienst
2. shapely
3. utm

In [119]:
import os
import pandas as pd
import polars as pl
import pathlib
import datetime as dt
import utm
from scipy.interpolate import LinearNDInterpolator
from shapely.geometry import Point, Polygon
from wetterdienst import Settings, Resolution, Period, Parameter
from wetterdienst.provider.dwd.observation import DwdObservationRequest, DwdObservationDataset, DwdObservationPeriod, DwdObservationResolution

In [120]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
settings = Settings(
    ts_shape="long",
    ts_humanize=True,
    ts_si_units=True
)

All the observations we would need are as follows
1. wind_speed
2. wind_direction
3. radiation_global
4. air_temperature
5. humidity
6. wind_gust_max
7. extreme_wind
8. wind_direction_gust_max

There are 298 weather stations in germany

**INTERPOLATION**

The function leverages the four closest stations to your specified latitude and longitude and employs the bilinear interpolation method provided by the scipy package (interp2d) to interpolate the given parameter values. Currently, this interpolation feature is exclusive to DWDObservationRequest and parameters temperature_air_mean_200, wind_speed, precipitation_height. As it is in its early stages, we welcome feedback to enhance and refine its functionality. Interpolation by nearby stations is limited to a distance of 40 km by default (20.0 km for precipitation). You can change this by setting the ts_interpolation_station_distance setting.


In [3]:
langenhorn = (54.679, 8.908)

Station ID - 02907
Latitude - 54.7903
Longitude - 8.9514
Name - "Leck"
State - Schleswig Holstein
Distance from langenhorn - 12.685km

In [107]:
def get_weather_data(param):
    request = DwdObservationRequest(
        parameter=[
             param
        ],
        resolution="10_minutes",
        start_date="2019-12-31 22:50:00",
        end_date="2023-12-31 00:00:00",
        settings=settings
    )
    stations = request.filter_by_rank(latlon=langenhorn, rank=1)
    df = stations.values.all().df.drop_nulls()
    return df

**Weather attributes**

In [108]:
df_wind_speed = get_weather_data("wind_speed") # mean of wind speed during the last 10 minutes in m/s
df_wind_speed = df_wind_speed.drop(["dataset", "parameter", "quality"])
df_wind_speed = df_wind_speed.rename({"value": "wind_speed"})

df_wind_speed.head(10)

station_id,date,wind_speed
str,"datetime[μs, UTC]",f64
"""02907""",2019-12-31 22:50:00 UTC,2.0
"""02907""",2019-12-31 23:00:00 UTC,2.1
"""02907""",2019-12-31 23:10:00 UTC,2.9
"""02907""",2019-12-31 23:20:00 UTC,2.5
"""02907""",2019-12-31 23:30:00 UTC,2.4
"""02907""",2019-12-31 23:40:00 UTC,2.6
"""02907""",2019-12-31 23:50:00 UTC,2.8
"""02907""",2020-01-01 00:00:00 UTC,2.7
"""02907""",2020-01-01 00:10:00 UTC,3.1
"""02907""",2020-01-01 00:20:00 UTC,2.7


In [89]:
df_wind_direction = get_weather_data("wind_direction") # mean of wind direction during the last 10 minutes in degrees
df_wind_direction = df_wind_direction.drop(["dataset", "parameter", "quality"])
df_wind_direction = df_wind_direction.rename({"value": "wind_direction"})

In [90]:
df_radiation_global = get_weather_data("radiation_global") # 10min-sum of solar incoming radiation in J/m2
df_radiation_global = df_radiation_global.drop(["dataset", "parameter", "quality"])
df_radiation_global = df_radiation_global.rename({"value": "radiation_global"})

In [91]:
df_air_temperature = get_weather_data("air_temperature") # # Historical 10-minute station observations of pressure, air temperature (at 5cm and 2m height), humidity and dew point for Germany
df_air_temperature = df_air_temperature.drop(["dataset", "parameter", "quality"])
df_air_temperature = df_air_temperature.rename({"value": "air_temperature"})

In [92]:
df_humidity = get_weather_data("humidity") # relative humidity at 2m height in %
df_humidity = df_humidity.drop(["dataset", "parameter", "quality"])
df_humidity = df_humidity.rename({"value": "humidity"})

In [93]:
df_wind_gust_max = get_weather_data("wind_gust_max")# # maximum wind gust of the last 10 minutes, the instrument samples the instantaneous wind velocity every 0.25 seconds, and writes out the max value of a 3 second period, the highest occuring within the 10min interval is given here as the maximum wind gust.
df_wind_gust_max = df_wind_gust_max.drop(["dataset", "parameter", "quality"])
df_wind_gust_max = df_wind_gust_max.rename({"value": "wind_gust_max"})

In [94]:
df_wind_direction_gust_max = get_weather_data("wind_direction_gust_max") # wind direction of highest wind gust
df_wind_direction_gust_max = df_wind_direction_gust_max.drop(["dataset", "parameter", "quality"])
df_wind_direction_gust_max = df_wind_direction_gust_max.rename({"value": "wind_direction_gust_max"})

**Joining all dataframes into one master df**

In [95]:
new_df = df_wind_speed.join(df_wind_direction, on=["station_id","date"], how = "inner")

In [96]:
new_df = new_df.join(df_radiation_global, on=["station_id","date"], how = "inner")

In [97]:
new_df = new_df.join(df_air_temperature, on=["station_id","date"], how = "inner")

In [98]:
new_df = new_df.join(df_humidity, on=["station_id","date"], how = "inner")

In [99]:
new_df = new_df.join(df_wind_gust_max, on=["station_id","date"], how = "inner")

In [100]:
new_df = new_df.join(df_wind_direction_gust_max, on=["station_id","date"], how = "inner")

new_df.head()

station_id,date,wind_speed,wind_direction,radiation_global,air_temperature,humidity,wind_gust_max,wind_direction_gust_max
str,"datetime[μs, UTC]",f64,f64,f64,f64,f64,f64,f64
"""02907""",2019-12-31 22:50:00 UTC,2.0,260.0,0.0,81.9,81.9,2.9,250.0
"""02907""",2019-12-31 23:00:00 UTC,2.1,250.0,0.0,80.7,80.7,3.3,250.0
"""02907""",2019-12-31 23:10:00 UTC,2.9,270.0,0.0,79.2,79.2,4.9,270.0
"""02907""",2019-12-31 23:20:00 UTC,2.5,260.0,0.0,80.7,80.7,3.6,260.0
"""02907""",2019-12-31 23:30:00 UTC,2.4,240.0,0.0,80.4,80.4,3.5,250.0


In [101]:
new_df["station_id"].unique()

station_id
str
"""02907"""


**Removing Station_id column since all the stations are the same - 02907**

In [102]:
new_df = new_df.drop(["station_id"])
new_df.head()

date,wind_speed,wind_direction,radiation_global,air_temperature,humidity,wind_gust_max,wind_direction_gust_max
"datetime[μs, UTC]",f64,f64,f64,f64,f64,f64,f64
2019-12-31 22:50:00 UTC,2.0,260.0,0.0,81.9,81.9,2.9,250.0
2019-12-31 23:00:00 UTC,2.1,250.0,0.0,80.7,80.7,3.3,250.0
2019-12-31 23:10:00 UTC,2.9,270.0,0.0,79.2,79.2,4.9,270.0
2019-12-31 23:20:00 UTC,2.5,260.0,0.0,80.7,80.7,3.6,260.0
2019-12-31 23:30:00 UTC,2.4,240.0,0.0,80.4,80.4,3.5,250.0


**Converting UTC to UTC+1 or UTC+2**


Standard time:	UTC +1	Central European Time (CET)
Daylight saving time:	UTC +2	Central European Summertime (CEST)
![Screenshot 2024-02-19 at 10.19.15 AM.png](attachment:828188b3-b04a-4619-8b20-7a4f1014e0d3.png)


In [103]:
def add_timedelta_based_on_month(dt):
    if dt.month in [4, 5, 6, 7, 8, 9, 10]:  # Summer months: April, May, June, July, August, September, October
        return dt + pd.Timedelta(hours=2)
    else:  # Winter months: November, December, January, February, March
        return dt + pd.Timedelta(hours=1)

In [109]:
weather_df = new_df.clone()

weather_df = weather_df.with_columns(
    pl.col("date").apply(add_timedelta_based_on_month)
)

weather_df = weather_df.slice(1,None) #removing the first row
weather_df.head()

  pl.col("date").apply(add_timedelta_based_on_month)


date,wind_speed,wind_direction,radiation_global,air_temperature,humidity,wind_gust_max,wind_direction_gust_max
datetime[μs],f64,f64,f64,f64,f64,f64,f64
2020-01-01 00:00:00,2.1,250.0,0.0,80.7,80.7,3.3,250.0
2020-01-01 00:10:00,2.9,270.0,0.0,79.2,79.2,4.9,270.0
2020-01-01 00:20:00,2.5,260.0,0.0,80.7,80.7,3.6,260.0
2020-01-01 00:30:00,2.4,240.0,0.0,80.4,80.4,3.5,250.0
2020-01-01 00:40:00,2.6,250.0,0.0,80.6,80.6,3.4,250.0


**Saving weather_df as a csv file**

In [122]:
path: pathlib.Path = "/content/drive/MyDrive/ms_wind_curtailment_prediction/weather_data.csv"
weather_df.write_csv(path, separator=",")