In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math

In [None]:
# read multiple csv from data folder
files = []
for file in os.listdir('data'):
    if file.endswith('.csv'):
        files.append(file)
print(files)

In [None]:
import geopandas as gpd
from shapely.geometry import Point
path_to_germany = "./data/vg2500_geo84/vg2500_bld.shp"
germany_gdf = gpd.read_file(path_to_germany)
germany_gdf.plot()

## Plot germany with grid

In [None]:
df = pd.read_csv('data/' + files[0])

In [None]:
geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
geo_df = gpd.GeoDataFrame(df, geometry=geometry)

In [None]:
fig, ax = plt.subplots()
germany_gdf.plot(ax=ax, color='lightgrey')

geo_df.plot(ax=ax, marker='o', color='red', markersize=5)

plt.show()

### Seperate the map of germany into a grid 

In [None]:
# Calculate midpoints
mid_latitude = df['latitude'].mean()
mid_longitude = df['longitude'].mean()

def categorize_location(row):
    if row['latitude'] >= mid_latitude and row['longitude'] <= mid_longitude:
        return 'top_left'
    elif row['latitude'] >= mid_latitude and row['longitude'] > mid_longitude:
        return 'top_right'
    elif row['latitude'] < mid_latitude and row['longitude'] <= mid_longitude:
        return 'bottom_left'
    else:
        return 'bottom_right'

# Apply the function to create the new 'location' column
df['location'] = df.apply(categorize_location, axis=1)
df['location']




In [None]:
# plot each location seperately
fig, ax = plt.subplots()
for i in df['location'].unique():
    temp_df = df[df['location'] == i]
    ax.scatter(temp_df['longitude'], temp_df['latitude'], label=i)
ax.legend()
plt.show()

## Clean dataframe with only important columns

In [None]:
df.columns

In [None]:
df = df.drop(columns=["blh","tcc", "tsr", "sund", "tp", "fsr", "cdir", "z", "msl"])
df.columns

## Read measurements

In [None]:
df_realized_supply = pd.read_csv('data/' + files[2], sep=';')
df_realized_supply.columns

### Again, drop unnecessary columns

In [None]:
df_realized_supply = df_realized_supply[['Date from', 'Date to', "Photovoltaic [MW]", "Wind Offshore [MW] ", "Wind Onshore [MW]"]]

In [None]:
df_realized_supply["wind_on_offshore"] = df_realized_supply["Wind Offshore [MW] "] + df_realized_supply["Wind Onshore [MW]"]

In [None]:
df_realized_supply = df_realized_supply.drop(columns=["Wind Offshore [MW] ", "Wind Onshore [MW]"])

In [None]:
df_realized_supply["photo"] = df_realized_supply["Photovoltaic [MW]"]

In [None]:
df_realized_supply = df_realized_supply.drop(columns=["Photovoltaic [MW]"])

In [None]:
df_realized_supply

## Plot measurements

In [None]:
import plotly.graph_objects as go
import plotly.express as px

In [None]:
fig = go.Figure()
x_axis = df_realized_supply["Date from"]

fig  = px.line(x=x_axis, y=df_realized_supply.photo,
                    )
fig.show()



In [None]:
fig = go.Figure()
x_axis = df_realized_supply["Date from"]

fig  = px.line(x=x_axis, y=df_realized_supply.wind_on_offshore,
                    )
fig.show()



## Get year and month from date

In [None]:
df_realized_supply["timestamps"] = pd.to_datetime(df_realized_supply["Date from"])
df_realized_supply['month_year'] = df_realized_supply['timestamps'].dt.strftime('%Y-%m')
df_realized_supply['day'] = df_realized_supply['timestamps'].dt.strftime('%d')

In [None]:
df_realized_supply

In [None]:
def preprocess_ssr(value):
    # Remove everything after the comma
    value = value.split(',')[0]
    # Remove any periods that are used as thousand separators
    value = value.replace('.', '')
    # Convert to float
    return float(value)
df_realized_supply["photo"] = df_realized_supply["photo"].apply(preprocess_ssr)
df_realized_supply["wind_on_offshore"] = df_realized_supply["wind_on_offshore"].apply(preprocess_ssr)

In [None]:
df_agg = df_realized_supply.groupby('month_year')["photo"].mean().reset_index()
fig = go.Figure()
x_axis = df_agg["month_year"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )
# title
fig.update_layout(
    title="Average photovoltaic power supply per month",
    xaxis_title="Month",
    yaxis_title="Power supply [MW]",
)
fig.show()



## Take weakly average

In [None]:
df_hourly = df_realized_supply.resample('H').photo.mean().reset_index()

df_daily = df_hourly.resample('D', on="timestamps").photo.mean().reset_index()

df_weekly = df_daily.resample('W', on='timestamps').photo.mean().reset_index()






In [None]:
df_weekly.sort_values(by="timestamps", inplace=True)
df_weekly

In [None]:
df_agg = df_weekly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )
# title
fig.update_layout(
    title="Average photovoltaic power supply per week",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()



In [None]:
df_agg = df_hourly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )
# title
fig.update_layout(
    title="Average photovoltaic power supply per hour",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()



## Combine ssr with this

In [None]:
pd.set_option('display.max_rows', 10)


In [None]:
df = df.drop(columns=["longitude", "latitude"])

In [None]:
df = df.drop_duplicates()
df

In [None]:
df["timestamps"] = pd.to_datetime(df["time"])

In [None]:
df_hourly = df.resample('H', on="timestamps")["ssr"].mean().reset_index()
df_daily = df_hourly.resample('D', on="timestamps")["ssr"].mean().reset_index()
df_weekly_ssr =df_daily.resample('W', on="timestamps")["ssr"].mean().reset_index()
df_weekly_ssr


## join dataframes

In [None]:
df_joined = pd.merge(df_weekly, df_weekly_ssr, on="timestamps", how="inner")
df_joined

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries

series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo'])

model = KalmanForecaster(dim_x=34)  # Specify the number of components (states)
model.fit(series)

# Forecast the next 20 time steps
forecast = model.predict(60)

# Plot the original series and the forecast
plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
df_joined.columns

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from sklearn.preprocessing import StandardScaler

series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', "ssr"])

model = KalmanForecaster(dim_x=34)  # Specify the number of components (states)
model.fit(series)

# Forecast the next 20 time steps
forecast = model.predict(60)

# Plot the original series and the forecast
plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_joined[['photo', 'ssr']] = scaler.fit_transform(df_joined[['photo', 'ssr']])
series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', "ssr"])

model = KalmanForecaster(dim_x=34)  # Specify the number of components (states)
model.fit(series)

# Forecast the next 20 time steps
forecast = model.predict(60)

# Plot the original series and the forecast
plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load your data


scaler = StandardScaler()
scaled_values = scaler.fit_transform(df_joined[['photo', 'ssr']])
df_joined[['photo', 'ssr']] = scaled_values

# Create a TimeSeries object with both 'photo' and 'ssr' columns
series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', 'ssr'])

# Initialize the KalmanForecaster with the appropriate dimension
model = KalmanForecaster(dim_x=34)  # Adjust dim_x based on the complexity needed
model.fit(series)

# Forecast the next 60 time steps
forecast = model.predict(60)

# Inverse transform the forecast data to the original scale
forecast_df = pd.DataFrame(forecast.pd_dataframe(), columns=['photo', 'ssr'])
forecast_inverse = scaler.inverse_transform(forecast_df)

# Reconstruct the TimeSeries object from the inverse transformed data
forecast_series = TimeSeries.from_dataframe(
    pd.DataFrame(forecast_inverse, index=forecast.time_index, columns=['photo', 'ssr']),
    time_col=None
)

# Plot the original series and the forecast
plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast_series.plot(label='Forecast')
plt.legend()
plt.title('lol')
plt.show()


In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load your data


scaler = StandardScaler()
scaled_values = scaler.fit_transform(df_joined[['photo']])
df_joined[['photo']] = scaled_values

# Create a TimeSeries object with both 'photo' and 'ssr' columns
series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo'])

# Initialize the KalmanForecaster with the appropriate dimension
model = KalmanForecaster(dim_x=34)  # Adjust dim_x based on the complexity needed
model.fit(series)

# Forecast the next 60 time steps
forecast = model.predict(60)

# Inverse transform the forecast data to the original scale
forecast_df = pd.DataFrame(forecast.pd_dataframe(), columns=['photo'])
forecast_inverse = scaler.inverse_transform(forecast_df)

# Reconstruct the TimeSeries object from the inverse transformed data
forecast_series = TimeSeries.from_dataframe(
    pd.DataFrame(forecast_inverse, index=forecast.time_index, columns=['photo']),
    time_col=None
)

# Plot the original series and the forecast
plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Forecast with only photovoltaic power supply')
plt.show()


In [None]:
model

# Was wurde in der Masterarbeit gemacht ?
- Exponential Smoothing um Model zu erstellen
- Trend, seasonality und residual
- Dabei wurde in jedem Update step von Kalman auch die Parameter des Models geändert
- Parameter wurden mit Maximum Likelihood geschätzt
- Das Model als State Transition Model
- Die Messungen als Observation Model
- Das hat nur gut geklappt, weil das rausfinden des zugrundeliegenden Models durch die seasonalität und pattern möglich war
- hat auch autocorrelation genutzt um das window für die Tage zu finden - clever

## Problem für mich
- Exponential smoothing aufwändig
- Updaten von 2 Modellen so gesehen
- Auch rechenaufwändig (wie in der Masterarbeit beschrieben)
- Masterarbeitaufwand vs Seminararbeit 3 ects
- Bedarf kompletter Eigenimplementierung ohne Bibliothek

## Lösung
- Kalman verstanden
- Problemstellung verstanden
- Warum die Kombi nicht so gut ist in diesem Fall
- wann sie gut wäre (und was man machen müsste damit es hier gut ist)
- Nutze dennoch darts und erkläre N4SID
- Damit hätten wir:
    - State Space models
    - Kalman Filter
    - Usecases wo und wann er gut ist, was die einzelnen Komponenten sind
    - Vorgehen
    - Bezug auf unser Projekt, inwiefern das hier anwendbar ist
    - Lösung: N4SID und Kalman mittels Darts Implementierung
    - Fazit


## Fragen
- Macht es riesen Unterschied ob SSR und Photo oder nur Photo ?
- Multivariate vs Univariat ?
- Darts Implementierung etwas schwammig, hidden states nicht einsehbar, genauso wie die Kovarianzen - schlimm ?
Immerhin beschreibe ich ja was die jeweils machen und wie sie zusammenhängen