In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math

# Mostly playing around with the data

In [None]:
files = []
for file in os.listdir('data'):
    if file.endswith('.csv'):
        files.append(file)
print(files)

In [None]:
import geopandas as gpd
from shapely.geometry import Point
path_to_germany = "./data/vg2500_geo84/vg2500_bld.shp"
germany_gdf = gpd.read_file(path_to_germany)
germany_gdf.plot()

In [None]:
df = pd.read_csv('data/' + files[0])

In [None]:
df

In [None]:
df_2022 = pd.read_csv('data/' + files[5])
df_2022

In [None]:
df = pd.concat([df,df_2022])
df

In [None]:
geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
geo_df = gpd.GeoDataFrame(df, geometry=geometry)

In [None]:
fig, ax = plt.subplots()
germany_gdf.plot(ax=ax, color='lightgrey')

geo_df.plot(ax=ax, marker='o', color='red', markersize=5)

plt.show()

In [None]:

mid_latitude = df['latitude'].mean()
mid_longitude = df['longitude'].mean()

def categorize_location(row):
    if row['latitude'] >= mid_latitude and row['longitude'] <= mid_longitude:
        return 'top_left'
    elif row['latitude'] >= mid_latitude and row['longitude'] > mid_longitude:
        return 'top_right'
    elif row['latitude'] < mid_latitude and row['longitude'] <= mid_longitude:
        return 'bottom_left'
    else:
        return 'bottom_right'


df['location'] = df.apply(categorize_location, axis=1)
df['location']




In [None]:
df

In [None]:

fig, ax = plt.subplots()
for i in df['location'].unique():
    temp_df = df[df['location'] == i]
    ax.scatter(temp_df['longitude'], temp_df['latitude'], label=i)
ax.legend()
plt.show()

In [None]:
df.columns

In [None]:
df = df.drop(columns=["blh","tcc", "tsr", "sund", "tp", "fsr", "cdir", "z", "msl"])
df.columns

In [None]:
df_realized_supply = pd.read_csv('data/' + files[2], sep=';')
df_realized_supply.columns

In [None]:
df_realized_supply = df_realized_supply[['Date from', 'Date to', "Photovoltaic [MW]", "Wind Offshore [MW] ", "Wind Onshore [MW]"]]

In [None]:
df_realized_supply["wind_on_offshore"] = df_realized_supply["Wind Offshore [MW] "] + df_realized_supply["Wind Onshore [MW]"]

In [None]:
df_realized_supply = df_realized_supply.drop(columns=["Wind Offshore [MW] ", "Wind Onshore [MW]"])

In [None]:
df_realized_supply["photo"] = df_realized_supply["Photovoltaic [MW]"]

In [None]:
df_realized_supply = df_realized_supply.drop(columns=["Photovoltaic [MW]"])

In [None]:
df_realized_supply

In [None]:
import plotly.graph_objects as go
import plotly.express as px

In [None]:
fig = go.Figure()
x_axis = df_realized_supply["Date from"]

fig  = px.line(x=x_axis, y=df_realized_supply.photo,
                    )
fig.show()



In [None]:
fig = go.Figure()
x_axis = df_realized_supply["Date from"]

fig  = px.line(x=x_axis, y=df_realized_supply.wind_on_offshore,
                    )
fig.show()



In [None]:
df_realized_supply["timestamps"] = pd.to_datetime(df_realized_supply["Date from"])
df_realized_supply['month_year'] = df_realized_supply['timestamps'].dt.strftime('%Y-%m')
df_realized_supply['day'] = df_realized_supply['timestamps'].dt.strftime('%d')
df_realized_supply["fullhour"] = df_realized_supply['timestamps'].dt.strftime('%H:%M')

In [None]:
df_realized_supply

In [None]:

df_full_hour = df_realized_supply.iloc[::4, :]
df_full_hour

In [None]:
df_full_4_hours = df_realized_supply.iloc[::16, :]
df_full_4_hours

In [None]:
df_full_6_hours = df_realized_supply.iloc[::24, :]
df_full_6_hours

In [None]:
def preprocess_ssr(value):

    if type(value) != float:
        value = value.split(',')[0]

        value = value.replace('.', '')

    return float(value)
df_realized_supply["photo"] = df_realized_supply["photo"].apply(preprocess_ssr)
df_realized_supply["wind_on_offshore"] = df_realized_supply["wind_on_offshore"].apply(preprocess_ssr)

df_full_hour["photo"] = df_realized_supply["photo"].apply(preprocess_ssr)
df_full_hour["wind_on_offshore"] = df_realized_supply["wind_on_offshore"].apply(preprocess_ssr)




In [None]:

df_full_4_hours["photo"] = df_realized_supply["photo"].apply(preprocess_ssr)
df_full_4_hours["wind_on_offshore"] = df_realized_supply["wind_on_offshore"].apply(preprocess_ssr)


In [None]:
df_agg = df_realized_supply.groupby('month_year')["photo"].mean().reset_index()
fig = go.Figure()
x_axis = df_agg["month_year"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per month",
    xaxis_title="Month",
    yaxis_title="Power supply [MW]",
)
fig.show()



In [None]:
df_hourly = df_realized_supply.resample('H', on="timestamps").photo.mean().reset_index()

df_daily = df_hourly.resample('D', on="timestamps").photo.mean().reset_index()

df_weekly = df_daily.resample('W', on='timestamps').photo.mean().reset_index()


In [None]:
df_hourly.sort_values(by="timestamps", inplace=True)
df_agg = df_hourly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per hour",
    xaxis_title="hour",
    yaxis_title="Power supply [MW]",
)
fig.show()

In [None]:
df_weekly.sort_values(by="timestamps", inplace=True)
df_agg = df_weekly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per hour",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()

In [None]:
df_train


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX

df_final = df_weekly
df_train = df_final[df_final["timestamps"] < "2022-01-01"]
df_test = df_final[df_final["timestamps"]>="2022-01-01"]

values = df_train['photo']
values_test = df_test['photo']



fit = model.fit(disp=False)


filtered_state_means = fit.filter_results.filtered_state[0]


filtered_series = pd.Series(filtered_state_means, df_train["timestamps"])
state_transition_matrix = fit.filter_results.transition
observation_matrix = fit.filter_results.design
process_covariance_matrix = fit.filter_results.state_cov
measurement_covariance_matrix = fit.filter_results.obs_cov
initial_state_mean = fit.filter_results.initial_state
initial_state_covariance = fit.filter_results.initial_state_cov


print("State Transition Matrix (F):")
print(state_transition_matrix)
print("\nObservation Matrix (H):")
print(observation_matrix)
print("\nProcess Covariance Matrix (Q):")
print(process_covariance_matrix)
print("\nMeasurement Covariance Matrix (R):")
print(measurement_covariance_matrix)
print("\nInitial State Mean:")
print(initial_state_mean)
print("\nInitial State Covariance:")
print(initial_state_covariance)



plt.figure(figsize=(15, 5))
plt.plot(df_train["timestamps"], values, label='Original')
plt.plot(filtered_series.index, filtered_series, label='Filtered')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filtered Time Series')
plt.show()


predictions_test = fit.get_forecast(steps=len(df_test))
predicted_mean_test = predictions_test.predicted_mean

weekly_pred_df = pd.DataFrame({'predicted_mean': predicted_mean_test})
weekly_pred_df.index = df_test["timestamps"]

hourly_predictions = weekly_pred_df.resample('H').interpolate()



plt.figure(figsize=(15, 5))
plt.plot(df_train["timestamps"], values, label='Train')
plt.plot(df_test["timestamps"], values_test, label='Test')
plt.plot(hourly_predictions.index, hourly_predictions, label='hourly Predictions')
plt.plot(df_test["timestamps"], predicted_mean_test, label='Predicted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Predicted Time Series')
plt.show()


In [None]:
plt.plot(hourly_predictions.index, hourly_predictions, label='Daily Predictions')

In [None]:
hourly_predictions

In [None]:
weekly_pred_df = pd.DataFrame({'predicted_mean': predicted_mean_test})
weekly_pred_df.index = df_test["timestamps"]

daily_predictions = weekly_pred_df.resample('D').interpolate()

In [None]:
daily_predictions

In [None]:
plt.plot(daily_predictions.index, daily_predictions, label='Daily Predictions')

In [None]:
df_hourly

In [None]:
df_hourly_certain_time = df_hourly[df_hourly["timestamps"]>="2022-01-02"]

In [None]:
plt.plot(df_hourly_certain_time["timestamps"], df_hourly_certain_time["photo"], label='Hourly vals')
plt.plot(hourly_predictions.index, hourly_predictions, label='hourly Predictions')

In [None]:
df_hourly_certain_time

In [None]:
hourly_predictions

In [None]:
hourly_index_2 = pd.date_range(start=df_weekly["timestamps"].min(), end=df_weekly["timestamps"].max(), freq='H')
hourly_index_2

In [None]:

hourly_index = pd.date_range(start=df_weekly.index.min(), end=df_weekly.index.max(), freq='H')



hourly_predictions_2 = hourly_predictions.reindex(hourly_index_2).interpolate()
hourly_predictions_2



In [None]:

hourly_predictions_3 = hourly_predictions_2.loc[hourly_predictions.index.min():hourly_predictions.index.max()]

In [None]:

df_hourly['timestamps'] = pd.to_datetime(df_hourly['timestamps'])
df_hourly.set_index('timestamps', inplace=True)

In [None]:
hourly_predictions_3 = hourly_predictions_3[hourly_predictions_3.index.isin(df_hourly.index)] 

In [None]:
hourly_predictions_3

In [None]:






residuals_train = df_hourly.loc[hourly_predictions_3.index, 'photo'] - hourly_predictions_3.loc[hourly_predictions_3.index, 'predicted_mean']


plt.figure(figsize=(15, 5))
plt.plot(residuals_train.index, residuals_train, label='Residuals')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.title('Residuals from SARIMAX Model')
plt.show()


In [None]:
residuals_train.fillna(0, inplace=True)

In [None]:

from pykalman import KalmanFilter
kf = KalmanFilter(
    transition_matrices=[1],
    observation_matrices=[1],
    initial_state_mean=residuals_train.mean(),
    initial_state_covariance=np.var(residuals_train),
    observation_covariance=np.var(residuals_train),
    transition_covariance=np.eye(1) * 0.01
)


kf_state_means, kf_state_covariances = kf.smooth(residuals_train.values)


kalman_filtered_residuals = pd.Series(kf_state_means.flatten(), index=residuals_train.index)


In [None]:
kf_state_means

In [None]:

combined_predictions = hourly_predictions_3['predicted_mean'] + kalman_filtered_residuals.reindex(hourly_predictions_3.index, method='nearest')





plt.figure(figsize=(15, 5))
plt.plot(df_hourly.index, df_hourly['photo'], label='Original')
plt.plot(hourly_predictions_3.index, hourly_predictions_3, label='Hourly Predictions')
plt.plot(combined_predictions.index, combined_predictions, label='Combined Predictions')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Hourly vs Combined Predictions')
plt.show()


In [None]:
combined_predictions

In [None]:
df_full_hour.sort_values(by="timestamps", inplace=True)
df_agg = df_full_hour
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per hour",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()

In [None]:
df_train = df_weekly[df_weekly["timestamps"] < "2022-01-01"]
df_test = df_weekly[df_weekly["timestamps"] >= "2022-01-01"]

values_train = df_train['photo']
values_test = df_test['photo']
values_train.fillna(0, inplace=True)
kf = KalmanFilter(


    em_vars=['transition_matrices', 'observation_matrices', 'transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)


kf = kf.em(values_train, n_iter=150)


In [None]:
df_train["timstamp"] = pd.to_datetime(df_train["timestamps"])
df_train.set_index("timstamp", inplace=True)


In [None]:

(smoothed_state_means_train, smoothed_state_covariances_train) = kf.smooth(values_train)


In [None]:
smoothed_state_means_train.shape

In [None]:




plt.figure(figsize=(15, 5))
plt.plot(df_train.index, values_train, label='Original')
plt.plot(df_train.index, smoothed_series, label='Smoothed')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Smoothed Time Series')
plt.show()


In [None]:

n_test_steps = len(values_test)
predicted_means = []


current_state_mean = smoothed_state_means_train[-1]
current_state_covariance = smoothed_state_covariances_train[-1]

for t in range(n_test_steps):

    current_state_mean, current_state_covariance = kf.filter_update(
        current_state_mean, current_state_covariance, observation=None, transition_matrix=kf.transition_matrices, observation_matrix=kf.observation_matrices, transition_covariance=kf.transition_covariance,
    )
    predicted_means.append(current_state_mean[0])

predicted_means = np.array(predicted_means)


In [None]:
print("Transition matrix:\n", kf.transition_matrices)
print("Observation matrix:\n", kf.observation_matrices)
print("Transition covariance:\n", kf.transition_covariance)
print("Observation covariance:\n", kf.observation_covariance)
print("Initial state mean:\n", kf.initial_state_mean)
print("Initial state covariance:\n", kf.initial_state_covariance)



print("Initial state mean for predictions:\n", initial_state_mean)
print("Initial state covariance for predictions:\n", initial_state_covariance)



In [None]:

values_test.fillna(0, inplace=True)
np.isnan(predicted_means).any()

In [None]:
from sklearn.metrics import r2_score, mean_squared_error


r2 = r2_score(values_test, predicted_means)
mse = mean_squared_error(values_test, predicted_means)


print(f'R^2: {r2:.4f}')
print(f'MSE: {mse:.4f}')


predicted_series = pd.Series(predicted_means, index=df_test["timestamps"])


plt.figure(figsize=(15, 5))


plt.plot(predicted_series.index, predicted_series, label='Predicted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Predicted Time Series')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pykalman import KalmanFilter
from sklearn.metrics import r2_score, mean_squared_error





date_range = pd.date_range(start='2020-01-01', end='2020-12-31 23:00:00', freq='H')





synthetic_data = sin_pattern + trend + noise


df = pd.DataFrame({'timestamps': date_range, 'photo': synthetic_data})
df.set_index('timestamps', inplace=True)


split_date = '2020-10-01'
df_train = df[df.index < split_date]
df_test = df[df.index >= split_date]

values_train = df_train['photo'].values
values_test = df_test['photo'].values


kf = KalmanFilter(


    em_vars=['transition_matrices', 'observation_matrices', 'transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)

kf = kf.em(values_train, n_iter=20)


(smoothed_state_means_train, smoothed_state_covariances_train) = kf.smooth(values_train)


smoothed_series = pd.Series(smoothed_state_means_train[:, 0], index=df_train.index)

plt.figure(figsize=(15, 5))
plt.plot(df_train.index, values_train, label='Original')
plt.plot(smoothed_series.index, smoothed_series, label='Smoothed')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Smoothed Time Series')
plt.show()


def predict_kalman_filter(kf, initial_state_mean, initial_state_covariance, n_steps):
    predicted_means = []
    current_state_mean = initial_state_mean
    current_state_covariance = initial_state_covariance
    
    for t in range(n_steps):
        current_state_mean, current_state_covariance = kf.filter_update(
            current_state_mean, current_state_covariance, observation=None
        )
        predicted_means.append(current_state_mean[0])
    
    return np.array(predicted_means)

initial_state_mean = smoothed_state_means_train[-1]
initial_state_covariance = smoothed_state_covariances_train[-1]
n_test_steps = len(values_test)

predicted_means = predict_kalman_filter(kf, initial_state_mean, initial_state_covariance, n_test_steps)


r2 = r2_score(values_test, predicted_means)
mse = mean_squared_error(values_test, predicted_means)

print(f'R^2: {r2:.4f}')
print(f'MSE: {mse:.4f}')

predicted_series = pd.Series(predicted_means, index=df_test.index)

plt.figure(figsize=(15, 5))
plt.plot(df_train.index, df_train['photo'], label='Train')
plt.plot(df_test.index, df_test['photo'], label='Test')
plt.plot(predicted_series.index, predicted_series, label='Predicted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Original vs Predicted Time Series')
plt.show()


print("Transition matrix:\n", kf.transition_matrices)
print("Observation matrix:\n", kf.observation_matrices)
print("Transition covariance:\n", kf.transition_covariance)
print("Observation covariance:\n", kf.observation_covariance)
print("Initial state mean:\n", kf.initial_state_mean)
print("Initial state covariance:\n", kf.initial_state_covariance)
print("Initial state mean for predictions:\n", initial_state_mean)
print("Initial state covariance for predictions:\n", initial_state_covariance)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykalman import KalmanFilter


np.random.seed(0)
time = np.linspace(0, 10, 500)
values = 100 + 10 * np.sin(time) + np.random.normal(size=time.shape) * 2


data = pd.DataFrame({'date': pd.date_range(start='2020-01-01', periods=len(time), freq='D'), 'value': values})


values = data['value'].values


initial_state_mean = values[0]


observation_matrix = np.array([[1]])


transition_matrix = np.array([[1]])






kf = KalmanFilter(


    em_vars=['transition_matrices', 'observation_matrices', 'transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)





filtered_state_means, filtered_state_covariances = kf.filter(values)
n_forecast = 50
last_filtered_state_mean = filtered_state_means[-1]
forecasted_state_means = last_filtered_state_mean
forecasted_values = [last_filtered_state_mean]

for _ in range(n_forecast):
    forecasted_state_means = np.dot(transition_matrix, forecasted_state_means)


forecasted_values = np.array(forecasted_values)


plt.figure(figsize=(14, 7))
plt.plot(data['date'], values, label='Original')
plt.plot(data['date'], filtered_state_means, label='Filtered')
plt.plot(pd.date_range(start=data['date'].iloc[-1], periods=n_forecast + 1, freq='D')[1:], forecasted_values, label='Forecasted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filter - Original, Filtered and Forecasted Time Series')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykalman import KalmanFilter


np.random.seed(0)
time = np.linspace(0, 10, 500)
values = 100 + 10 * np.sin(time) + np.random.normal(size=time.shape) * 2


data = pd.DataFrame({'date': pd.date_range(start='2020-01-01', periods=len(time), freq='D'), 'value': values})


values = data['value'].values


kf = KalmanFilter(


    em_vars=['transition_matrices', 'observation_matrices', 'transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)


initial_state_mean = [values[0], 0]
initial_state_covariance = np.eye(2)





filtered_state_means, filtered_state_covariances = kf.filter(values.reshape(-1, 1))


n_forecast = 50
last_filtered_state_mean = filtered_state_means[-1]
forecasted_state_means = last_filtered_state_mean
forecasted_values = []
print(kf.transition_matrices)

for _ in range(n_forecast):
    forecasted_state_means = kf.transition_matrices @ forecasted_state_means
    forecasted_values.append(forecasted_state_means[0])

forecasted_values = np.array(forecasted_values)


plt.figure(figsize=(14, 7))


plt.plot(pd.date_range(start=data['date'].iloc[-1], periods=n_forecast + 1, freq='D')[1:], forecasted_values, label='Forecasted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filter - Original, Filtered and Forecasted Time Series')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykalman import KalmanFilter


np.random.seed(0)
time = np.linspace(0, 10, 500)
values = 100 + 10 * np.sin(time) + np.random.normal(size=time.shape) * 2


data = pd.DataFrame({'date': pd.date_range(start='2020-01-01', periods=len(time), freq='D'), 'value': values})


values = data['value'].values






transition_matrix = np.array([
    [1, 1, 0, 0],
    [0, 1, -omega, 0],
    [0, 0, np.cos(omega), -np.sin(omega)],
    [0, 0, np.sin(omega), np.cos(omega)]
])


observation_matrix = np.array([[1, 0, 1, 0]])










kf = KalmanFilter(
    transition_matrices=transition_matrix,
    observation_matrices=observation_matrix,
    initial_state_mean=initial_state_mean,
    initial_state_covariance=initial_state_covariance,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance
)





filtered_state_means, filtered_state_covariances = kf.filter(values.reshape(-1, 1))


n_forecast = 50
last_filtered_state_mean = filtered_state_means[-1]
forecasted_state_means = last_filtered_state_mean
forecasted_values = []

for _ in range(n_forecast):
    forecasted_state_means = np.dot(kf.transition_matrices, forecasted_state_means)
    forecasted_values.append(forecasted_state_means[0])

forecasted_values = np.array(forecasted_values)


plt.figure(figsize=(14, 7))
plt.plot(data['date'], values, label='Original')
plt.plot(data['date'], filtered_state_means[:, 0], label='Filtered')
plt.plot(pd.date_range(start=data['date'].iloc[-1], periods=n_forecast + 1, freq='D')[1:], forecasted_values, label='Forecasted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filter - Original, Filtered and Forecasted Time Series')
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykalman import KalmanFilter


np.random.seed(0)
time = np.linspace(0, 10, 500)
values = 100 + 10 * np.sin(time) + np.random.normal(size=time.shape) * 2


data = pd.DataFrame({'date': pd.date_range(start='2020-01-01', periods=len(time), freq='D'), 'value': values})


values = data['value'].values






transition_matrix = np.array([
    [1, dt, 0.5 * dt**2],
    [0, 1, dt],
    [0, 0, 1]
])












kf = KalmanFilter(
    transition_matrices=transition_matrix,
    observation_matrices=observation_matrix,
    initial_state_mean=initial_state_mean,
    initial_state_covariance=initial_state_covariance,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance
)





filtered_state_means, filtered_state_covariances = kf.filter(values.reshape(-1, 1))


n_forecast = 50
last_state_mean = filtered_state_means[-1]
last_state_covariance = filtered_state_covariances[-1]
forecasted_values = []

for _ in range(n_forecast):
    last_state_mean, last_state_covariance = kf.filter_update(
        last_state_mean, last_state_covariance
    )
    forecasted_values.append(last_state_mean[0])

forecasted_values = np.array(forecasted_values)


plt.figure(figsize=(14, 7))
plt.plot(data['date'], values, label='Original')
plt.plot(data['date'], filtered_state_means[:, 0], label='Filtered')
plt.plot(pd.date_range(start=data['date'].iloc[-1], periods=n_forecast + 1, freq='D')[1:], forecasted_values, label='Forecasted')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filter - Original, Filtered and Forecasted Time Series')
plt.show()


In [None]:
df_hourly[df_hourly["photo"].isnull()]["photo"] = 0


In [None]:
df_hourly.fillna(0, inplace=True)

In [None]:
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.metrics import mape
from darts.metrics.metrics import rmse, mae, r2_score

In [None]:
df_final = df_hourly
df_train = df_final[df_final["timestamps"] < "2022-01-01"]
df_test = df_final[df_final["timestamps"]>="2022-01-01"]


series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_test = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=80)
model.fit(series)

forecast = model.predict(len(series_test))

pred_val = forecast
val_error = mape(forecast, series_test)
print(f'MAPE on validation set: {val_error:.2f}%')

eval = rmse(series_test, forecast)
eval_mae = mae(series_test, forecast)
r2 = r2_score(series_test, forecast)




print(eval)
print(eval_mae)
print(r2)



plt.figure(figsize=(10, 6))
series.plot(label='train')
series_test.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()


In [None]:
import pandas as pd
import numpy as np







values = seasonal_pattern + noise
data = pd.DataFrame({'date': dates, 'value': values})
data.to_csv('sample_data.csv', index=False)


print(data.head())

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX







values = seasonal_pattern + noise
data = pd.DataFrame({'date': dates, 'value': values})
data.to_csv('sample_data.csv', index=False)


data = pd.read_csv('sample_data.csv', parse_dates=['date'], index_col='date')


values = data['value']



fit = model.fit(disp=False)


filtered_state_means = fit.filter_results.filtered_state[0]


filtered_series = pd.Series(filtered_state_means, index=data.index)


plt.figure(figsize=(15, 5))
plt.plot(data.index, values, label='Original')
plt.plot(filtered_series.index, filtered_series, label='Filtered')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filtered Time Series')
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX

df_final = df_hourly
df_train = df_final[df_final["timestamps"] < "2022-01-01"]
df_test = df_final[df_final["timestamps"]>="2022-01-01"]

values = df_train['photo']



fit = model.fit(disp=True, maxiter=10)


filtered_state_means = fit.filter_results.filtered_state[0]


filtered_series = pd.Series(filtered_state_means, index=data.index)


plt.figure(figsize=(15, 5))
plt.plot(data.index, values, label='Original')
plt.plot(filtered_series.index, filtered_series, label='Filtered')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Kalman Filtered Time Series')
plt.show()


In [None]:
df_agg = df_daily
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per day (actual)",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()



In [None]:
df_weekly.sort_values(by="timestamps", inplace=True)
df_agg = df_weekly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average photovoltaic power supply per week (actual)",
    xaxis_title="week",
    yaxis_title="Power supply [MW]",
)
fig.show()



In [None]:
pd.set_option('display.max_rows', 10)


In [None]:
df = df.drop(columns=["longitude", "latitude"])

In [None]:
df = df.drop_duplicates()
df

In [None]:
df["timestamps"] = pd.to_datetime(df["time"])

In [None]:
df_hourly_ssr = df.resample('H', on="timestamps")["ssr"].mean().reset_index()
df_daily_ssr = df_hourly_ssr.resample('D', on="timestamps")["ssr"].mean().reset_index()
df_weekly_ssr =df_daily_ssr.resample('W', on="timestamps")["ssr"].mean().reset_index()
df_weekly_ssr


In [None]:

df_joined = df_daily
df_joined

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.metrics.metrics import rmse, mae, r2_score


In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2021-01-01"]
df_test = df_final[df_final["timestamps"]>="2021-01-01"]

series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_actual = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=100
                         )
model.fit(series)

forecast = model.predict(365)

eval = rmse(series_actual, forecast)
eval_mae = mae(series_actual, forecast)
r2 = r2_score(series_actual, forecast)


print(eval)
print(eval_mae)
print(r2)
plt.figure(figsize=(10, 6))
series.plot(label='train')
series_actual.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2021-01-01"]
df_test = df_final[df_final["timestamps"]>="2021-01-01"]

series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_actual = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=200
                         )
model.fit(series)

forecast = model.predict(365)

eval = rmse(series_actual, forecast)
eval_mae = mae(series_actual, forecast)
r2 = r2_score(series_actual, forecast)


print(eval)
print(eval_mae)
print(r2)
plt.figure(figsize=(10, 6))
series.plot(label='train')
series_actual.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2021-01-01"]
df_test = df_final[df_final["timestamps"]>="2021-01-01"]

series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_actual = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=146
                         
                         )
model.fit(series)

forecast = model.predict(365)

eval = rmse(series_actual, forecast)
eval_mae = mae(series_actual, forecast)
r2 = r2_score(series_actual, forecast)


print(eval)
print(eval_mae)
print(r2)
plt.figure(figsize=(10, 6))
series.plot(label='train')
series_actual.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2021-01-01"]
df_test = df_final[df_final["timestamps"]>="2021-01-01"]

series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_actual = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=220
                         
                         )
model.fit(series)

forecast = model.predict(730)

eval = rmse(series_actual, forecast)
eval_mae = mae(series_actual, forecast)
r2 = r2_score(series_actual, forecast)


print(eval)
print(eval_mae)
print(r2)
plt.figure(figsize=(10, 6))
series.plot(label='train')
series_actual.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
import time

In [None]:
df_joined

In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2022-01-01"]
df_test = df_final[df_final["timestamps"]>="2022-01-01"]


In [None]:
df_train

In [None]:
print("start training..")
time_start = time.time()
series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_actual = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])
best_eval = 0
best_mae = 0
best_r2 = 0
best_number_states = 1
best_series = series

second_best_eval = 0
second_best_mae = 0
second_best_r2 = 0
second_best_number_states = 1

for i in range(220,285):

    model = KalmanForecaster(dim_x=i)
    model.fit(series)

    forecast = model.predict(365)

    eval = rmse(series_actual, forecast)
    eval_mae = mae(series_actual, forecast)
    r2 = r2_score(series_actual, forecast)
    if i%10==0:
        print(f"Step: {i}")
    if r2 > best_r2:


        second_best_eval = best_eval
        second_best_mae = best_mae
        second_best_r2 = best_r2
        second_best_number_states = best_number_states
        
        best_eval = eval
        best_mae = eval_mae
        best_r2 = r2
        best_number_states = i

        best_series = forecast
print("training finished")
print(f"duration:  {time.time()- time_start}")

print(f"RMSE: {best_eval}, MAE: {best_mae}, R2: {best_r2}, best_number_states: {best_number_states} \n")
print(f"Second: RMSE: {second_best_eval}, MAE: {second_best_mae}, R2: {second_best_r2}, best_number_states: {second_best_number_states}")
plt.figure(figsize=(10, 6))
series.plot(label='train')
series_actual.plot(label='test_vals')
best_series.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:



model.fit(series)


forecast = model.predict(60)


plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
df_joined.columns

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from sklearn.preprocessing import StandardScaler

series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', "ssr"])


model.fit(series)


forecast = model.predict(60)


plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.datasets import AirPassengersDataset
import matplotlib.pyplot as plt
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_joined[['photo', 'ssr']] = scaler.fit_transform(df_joined[['photo', 'ssr']])
series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', "ssr"])


model.fit(series)


forecast = model.predict(60)


plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()

In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler




scaler = StandardScaler()
scaled_values = scaler.fit_transform(df_joined[['photo', 'ssr']])
df_joined[['photo', 'ssr']] = scaled_values


series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo', 'ssr'])



model.fit(series)


forecast = model.predict(60)


forecast_df = pd.DataFrame(forecast.pd_dataframe(), columns=['photo', 'ssr'])
forecast_inverse = scaler.inverse_transform(forecast_df)


forecast_series = TimeSeries.from_dataframe(
    pd.DataFrame(forecast_inverse, index=forecast.time_index, columns=['photo', 'ssr']),
    time_col=None
)


plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast_series.plot(label='Forecast')
plt.legend()
plt.title('lol')
plt.show()


In [None]:
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler




scaler = StandardScaler()
scaled_values = scaler.fit_transform(df_joined[['photo']])
df_joined[['photo']] = scaled_values


series = TimeSeries.from_dataframe(df_joined, 'timestamps', ['photo'])



model.fit(series)


forecast = model.predict(60)


forecast_df = pd.DataFrame(forecast.pd_dataframe(), columns=['photo'])
forecast_inverse = scaler.inverse_transform(forecast_df)


forecast_series = TimeSeries.from_dataframe(
    pd.DataFrame(forecast_inverse, index=forecast.time_index, columns=['photo']),
    time_col=None
)


plt.figure(figsize=(10, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast')
plt.legend()
plt.title('Forecast with only photovoltaic power supply')
plt.show()


In [None]:
model


- Exponential Smoothing um Model zu erstellen
- Trend, seasonality und residual
- Dabei wurde in jedem Update step von Kalman auch die Parameter des Models geändert
- Parameter wurden mit Maximum Likelihood geschätzt
- Das Model als State Transition Model
- Die Messungen als Observation Model
- Das hat nur gut geklappt, weil das rausfinden des zugrundeliegenden Models durch die seasonalität und pattern möglich war
- hat auch autocorrelation genutzt um das window für die Tage zu finden - clever


- Exponential smoothing aufwändig
- Updaten von 2 Modellen so gesehen
- Auch rechenaufwändig (wie in der Masterarbeit beschrieben)
- Masterarbeitaufwand vs Seminararbeit 3 ects
- Bedarf kompletter Eigenimplementierung ohne Bibliothek


- Kalman verstanden
- Problemstellung verstanden
- Warum die Kombi nicht so gut ist in diesem Fall
- wann sie gut wäre (und was man machen müsste damit es hier gut ist)
- Nutze dennoch darts und erkläre N4SID
- Damit hätten wir:
    - State Space models
    - Kalman Filter
    - Usecases wo und wann er gut ist, was die einzelnen Komponenten sind
    - Vorgehen
    - Bezug auf unser Projekt, inwiefern das hier anwendbar ist
    - Lösung: N4SID und Kalman mittels Darts Implementierung
    - Fazit



- Multivariate vs Univariat ?
- Darts Implementierung etwas schwammig, hidden states nicht einsehbar, genauso wie die Kovarianzen - schlimm ?


In [None]:
import pandas as pd
from darts import TimeSeries
from darts.models import KalmanForecaster
from darts.metrics import mape

df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2021-06-01"]
df_val = df_final[(df_final["timestamps"]>="2021-06-01") & (df_final["timestamps"]<"2022-01-01")] 
df_test = df_final[df_final["timestamps"]>="2022-01-01"]

series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_val = TimeSeries.from_dataframe(df_val, "timestamps", ["photo"])


series_test = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=280)
model.fit(series)

forecast = model.predict(len(series_val))

pred_val = forecast
val_error = mape(series_val, pred_val)
print(f'MAPE on validation set: {val_error:.2f}%')

eval = rmse(series_val, forecast)
eval_mae = mae(series_val, forecast)
r2 = r2_score(series_val, forecast)




print(eval)
print(eval_mae)
print(r2)

print("training again..")
combined_train_val = series.append(series_val)
model.fit(combined_train_val)

pred_test = model.predict(len(series_test))
test_error = mape(series_test, pred_test)
print(f'MAPE on test set: {test_error:.2f}%')
eval = rmse(pred_test, series_test)
eval_mae = mae(pred_test, series_test)
r2 = r2_score(pred_test, series_test)

print(eval)
print(eval_mae)
print(r2)


plt.figure(figsize=(10, 6))
combined_train_val.plot(label='train')
series_test.plot(label='test_vals')
pred_test.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()





In [None]:
df_final = df_joined
df_train = df_final[df_final["timestamps"] < "2022-01-01"]
df_test = df_final[df_final["timestamps"]>="2022-01-01"]


series = TimeSeries.from_dataframe(df_train, "timestamps", ["photo"])
series_test = TimeSeries.from_dataframe(df_test, "timestamps", ["photo"])

model = KalmanForecaster(dim_x=280)
model.fit(series)

forecast = model.predict(len(series_test))

pred_val = forecast
val_error = mape(forecast, series_test)
print(f'MAPE on validation set: {val_error:.2f}%')

eval = rmse(series_test, forecast)
eval_mae = mae(series_test, forecast)
r2 = r2_score(series_test, forecast)




print(eval)
print(eval_mae)
print(r2)



plt.figure(figsize=(10, 6))
series.plot(label='train')
series_test.plot(label='test_vals')
forecast.plot(label='Forecast')

plt.legend()
plt.title('Kalman Filter Forecast using N4SID')
plt.show()


In [None]:
import numpy as np
import pylab as pl
from pykalman import KalmanFilter



n_timesteps = 100




solar_radiation = (solar_radiation - np.mean(solar_radiation)) / np.std(solar_radiation)
pv_output = (pv_output - np.mean(pv_output)) / np.std(pv_output)
















kf = KalmanFilter(
    transition_matrices=transition_matrix,
    observation_matrices=observation_matrix,
    transition_covariance=transition_covariance,
    observation_covariance=observation_covariance,
    transition_offsets=transition_offsets,
    observation_offsets=observation_offset,
    initial_state_mean=initial_state_mean,
    initial_state_covariance=initial_state_covariance,
    em_vars=[
        'transition_matrices', 'observation_matrices',
        'transition_covariance', 'observation_covariance',
        'observation_offsets', 'initial_state_mean',
        'initial_state_covariance'
    ]
)


observations = solar_radiation.reshape(-1, 1)


loglikelihoods = np.zeros(10)
for i in range(len(loglikelihoods)):
    kf = kf.em(X=observations, n_iter=1)
    loglikelihoods[i] = kf.loglikelihood(observations)


filtered_state_estimates = kf.filter(observations)[0]


pl.figure(figsize=(16, 6))
lines_obs = pl.plot(observations, linestyle='-', color='b', label='Solar Radiation (observations)')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g', label='Filtered PV Output (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')
pl.show()


pl.figure()
pl.plot(loglikelihoods)
pl.xlabel('EM iteration number')
pl.ylabel('Log likelihood')
pl.show()


In [None]:
import numpy as np
import pylab as pl
from pykalman import KalmanFilter



n_timesteps = 100




solar_radiation = (solar_radiation - np.mean(solar_radiation)) / np.std(solar_radiation)
pv_output = (pv_output - np.mean(pv_output)) / np.std(pv_output)





M = n_timesteps


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


pl.figure(figsize=(16, 6))
lines_true = pl.plot(x, linestyle='-', color='b', label='True PV Output (hidden state)')
lines_obs = pl.plot(z, linestyle=':', color='m', label='Solar Radiation (observation)')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g', label='Filtered PV Output (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')
pl.show()


In [None]:
import numpy as np
import pylab as pl
from pykalman import KalmanFilter



n_timesteps = 100




solar_radiation = (solar_radiation - np.mean(solar_radiation)) / np.std(solar_radiation)
pv_output = (pv_output - np.mean(pv_output)) / np.std(pv_output)





M = n_timesteps


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


pl.figure(figsize=(16, 6))
lines_true = pl.plot(x, linestyle='-', color='b', label='True PV Output (hidden state)')
lines_obs = pl.plot(z, linestyle=':', color='m', label='Solar Radiation (observation)')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g', label='Filtered PV Output (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')
pl.show()
