In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import plotly.graph_objects as go
import plotly.express as px

# Read files

In [None]:
files = []
for file in os.listdir('data'):
    if file.endswith('.csv'):
        files.append(file)
print(files)

In [None]:
df = pd.read_csv('data/' + files[0])
df

In [None]:
df_2022 = pd.read_csv('data/' + files[5])
df_2022

In [None]:
df = pd.concat([df,df_2022])
df

# Select important features

In [None]:
df_new = df.groupby("time").agg({"ssr":"mean", "u100":"mean", "v100":"mean", "t2m": "mean", "tsr":"mean"}).reset_index()

In [None]:
df_new["celsius"] = df_new["t2m"] - 273.15 # Convert to Celsius

In [None]:
df_new["windspeed"] = np.sqrt(df_new['u100']**2 + df_new['v100']**2) # Calculate wind speed
df_new

In [None]:
df_new["timestamps"] = pd.to_datetime(df_new["time"])

In [None]:
df_new_daily = df_new.resample('D', on="timestamps")["ssr","windspeed", "celsius", "t2m"].mean().reset_index()
df_new_daily

## Look at ssr and celsius, bell shaped curve clearly visible

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_new_daily["timestamps"], y=df_new_daily["ssr"], mode='lines', name='ssr'))
fig.add_trace(go.Scatter(x=df_new_daily["timestamps"], y=df_new_daily["windspeed"], mode='lines', name='windspeed'))
fig.add_trace(go.Scatter(x=df_new_daily["timestamps"], y=df_new_daily["celsius"], mode='lines', name='celcius'))
fig.show()


In [None]:
df_realized_supply = pd.read_csv('data/' + files[2], sep=';')
df_realized_supply.columns

In [None]:
df_realized_supply

In [None]:
df_realized_supply = df_realized_supply[['Date from', 'Date to', "Photovoltaic [MW]", "Wind Offshore [MW] ", "Wind Onshore [MW]"]]
df_realized_supply.columns

In [None]:
df_copy = df_realized_supply.copy()

In [None]:

df_copy["wind_off"] = df_copy["Wind Offshore [MW] "].str.replace('.', '').str.replace(",",".").astype(float)
df_copy["wind_on"] = df_copy["Wind Onshore [MW]"].str.replace('.', '').str.replace(",",".").astype(float)
df_copy["photo"] = df_copy["Photovoltaic [MW]"].str.replace('.', '').str.replace(",",".").astype(float)





In [None]:
df_copy

In [None]:
df_realized_supply

In [None]:
df_copy["wind_on_offshore"] = df_copy["wind_off"] + df_copy["wind_on"]
df_copy


In [None]:
df_copy = df_copy.drop(columns=["Wind Offshore [MW] ", "Wind Onshore [MW]"])
df_copy = df_copy.drop(columns=["Photovoltaic [MW]"])

In [None]:
df_copy

In [None]:
df_realized_supply = df_copy

In [None]:
df_realized_supply["timestamps"] = pd.to_datetime(df_realized_supply["Date from"])
df_realized_supply['month_year'] = df_realized_supply['timestamps'].dt.strftime('%Y-%m')
df_realized_supply['day'] = df_realized_supply['timestamps'].dt.strftime('%d')
df_realized_supply["fullhour"] = df_realized_supply['timestamps'].dt.strftime('%H:%M')

In [None]:
df_hourly = df_realized_supply.resample('H', on="timestamps")["photo","wind_on_offshore"].mean().reset_index()

df_daily = df_hourly.resample('D', on="timestamps")["photo","wind_on_offshore"].mean().reset_index()



In [None]:
df_hourly

In [None]:
df_hourly.sort_values(by="timestamps", inplace=True)
df_agg = df_hourly
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.wind_on_offshore,
                    )

fig.update_layout(
    title="Average Wind power supply per hour",
    xaxis_title="hour",
    yaxis_title="Wind onshore and offshore power supply [MW]",
)
fig.show()

In [None]:
df_daily.sort_values(by="timestamps", inplace=True)
df_agg = df_daily
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.wind_on_offshore,
                    )

fig.update_layout(
    title="Average Wind power supply per day",
    xaxis_title="hour",
    yaxis_title="Wind onshore and offshore power supply [MW]",
)
fig.show()

In [None]:
df_daily.sort_values(by="timestamps", inplace=True)
df_agg = df_daily
fig = go.Figure()
x_axis = df_agg["timestamps"]

fig  = px.line(x=x_axis, y=df_agg.photo,
                    )

fig.update_layout(
    title="Average Photovoltaic power supply per day",
    xaxis_title="hour",
    yaxis_title="Wind onshore and offshore power supply [MW]",
)
fig.show()

## Merge dataframe with features and energy output

In [None]:
df_merged_daily = pd.merge(df_new_daily, df_daily, on="timestamps", how="inner")
df_merged_daily

In [None]:
df_hourly_merged = pd.merge(df_new, df_hourly, on="timestamps", how="inner")
df_hourly_merged

## Handle missing values

In [None]:
df_hourly_merged.isna().sum()

In [None]:

df_hourly_merged[df_hourly_merged.isna().any(axis=1)]

In [None]:
df_hourly_merged.fillna(0, inplace=True)

## Include shifted feature

In [None]:
df_hourly_merged["pv_lag"] = df_hourly_merged["photo"].shift(24)
df_hourly_merged.fillna(0, inplace=True)

## PV Daily

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = df_merged_daily

data['timestamps'] = pd.to_datetime(data['timestamps'])


timestamps_all = data['timestamps']
ssr_all = data['ssr'].values
photo_all = data['photo'].values


data_2022 = data[data['timestamps'].dt.year == 2022]


timestamps_2022 = data_2022['timestamps']
ssr_2022 = data_2022['ssr'].values
photo_2022 = data_2022['photo'].values

# Prepare matrices for the entire dataset
x_all = photo_all.reshape(-1, 1)  # Hidden states (photo)
z_all = ssr_all.reshape(-1, 1)  # Observations (ssr)

M_all = len(photo_all)


A = np.dot(x_all[1:].T, x_all[:-1]
           ) @ np.linalg.inv(np.dot(x_all[:-1].T, x_all[:-1]))
W = (np.dot(x_all[1:].T, x_all[1:]) -
     np.dot(A, np.dot(x_all[:-1].T, x_all[1:]))) / (M_all - 1)
H = np.dot(z_all.T, x_all) @ np.linalg.inv(np.dot(x_all.T, x_all))
Q = (np.dot(z_all.T, z_all) - np.dot(H, np.dot(x_all.T, z_all))) / M_all


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_all[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates_all = kf.filter(z_all)[0]


filtered_state_estimates_2022 = kf.filter(
    z_all[timestamps_all.dt.year == 2022])[0]


mae = mean_absolute_error(photo_all, filtered_state_estimates_all)
mse = mean_squared_error(photo_all, filtered_state_estimates_all)
rmse = np.sqrt(mse)
r_squared = r2_score(photo_all, filtered_state_estimates_all)


print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=timestamps_2022, y=photo_2022.flatten(),
    mode='lines', name='True PV Output (PV)',
    line=dict(color='blue')
))


fig.add_trace(go.Scatter(
    x=timestamps_2022, y=filtered_state_estimates_2022.flatten(),
    mode='lines+markers', name='Filtered PV Output (state estimate)',
    line=dict(color='green', dash='dash', width=2)
))


fig.update_layout(
    title='PV Output (Daily) for 2022',
    xaxis_title='Time',
    yaxis_title='PV Output',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d'),
    width=800,
    height=400
)

fig.show()

# Wind Daily

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = df_merged_daily


data['timestamps'] = pd.to_datetime(data['timestamps'])


timestamps_all = data['timestamps']
windspeed = data['windspeed'].values
wind_energy = data['wind_on_offshore'].values


data_2022 = data[data['timestamps'].dt.year == 2022]


timestamps_2022 = data_2022['timestamps']
wind_energy_2022 = data_2022['wind_on_offshore'].values

x_all = wind_energy.reshape(-1, 1)  # Hidden states (photo)
z_all = windspeed.reshape(-1, 1)  # Observations (ssr)


M_all = len(wind_energy)


A = np.dot(x_all[1:].T, x_all[:-1]
           ) @ np.linalg.inv(np.dot(x_all[:-1].T, x_all[:-1]))
W = (np.dot(x_all[1:].T, x_all[1:]) -
     np.dot(A, np.dot(x_all[:-1].T, x_all[1:]))) / (M_all - 1)
H = np.dot(z_all.T, x_all) @ np.linalg.inv(np.dot(x_all.T, x_all))
Q = (np.dot(z_all.T, z_all) - np.dot(H, np.dot(x_all.T, z_all))) / M_all


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_all[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates_all = kf.filter(z_all)[0]


filtered_state_estimates_2022 = kf.filter(
    z_all[timestamps_all.dt.year == 2022])[0]


mae = mean_absolute_error(wind_energy, filtered_state_estimates_all)
mse = mean_squared_error(wind_energy, filtered_state_estimates_all)
rmse = np.sqrt(mse)
r_squared = r2_score(wind_energy, filtered_state_estimates_all)


print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=timestamps_2022, y=wind_energy_2022.flatten(),
    mode='lines', name='True Wind Output (photo)',
    line=dict(color='blue')
))


fig.add_trace(go.Scatter(
    x=timestamps_2022, y=filtered_state_estimates_2022.flatten(),
    mode='lines+markers', name='Filtered Wind Output (state estimate)',
    line=dict(color='green', dash='dash')
))


fig.update_layout(
    title='Wind Output (Daily) for 2022',
    xaxis_title='Time',
    yaxis_title='On+Offshore in MW',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d'),
    width=800,
    height=400
)

fig.show()

# PV hourly 
## without feature is really bad, any straight line with mean value will be better than this

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


data = df_hourly_merged  # Replace with your actual DataFrame

data['timestamps'] = pd.to_datetime(data['timestamps'])


timestamps_all = data['timestamps']
ssr_all = data['ssr'].values
photo_all = data['photo'].values


data_2022 = data[data['timestamps'].dt.year == 2022]

timestamps_2022 = data_2022['timestamps']
ssr_2022 = data_2022['ssr'].values
photo_2022 = data_2022['photo'].values

x_all = photo_all.reshape(-1, 1)  
z_all = ssr_all.reshape(-1, 1)  

M_all = len(photo_all)


A = np.dot(x_all[1:].T, x_all[:-1]) @ np.linalg.inv(np.dot(x_all[:-1].T, x_all[:-1]))
W = (np.dot(x_all[1:].T, x_all[1:]) - np.dot(A, np.dot(x_all[:-1].T, x_all[1:]))) / (M_all - 1)
H = np.dot(z_all.T, x_all) @ np.linalg.inv(np.dot(x_all.T, x_all))
Q = (np.dot(z_all.T, z_all) - np.dot(H, np.dot(x_all.T, z_all))) / M_all

# Convert to correct shapes
A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)

kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_all[0],
    initial_state_covariance=np.eye(1)
)

filtered_state_estimates_all = kf.filter(z_all)[0]

mae = mean_absolute_error(photo_all, filtered_state_estimates_all)
mse = mean_squared_error(photo_all, filtered_state_estimates_all)
rmse = np.sqrt(mse)
r_squared = r2_score(photo_all, filtered_state_estimates_all)

# Print performance metrics
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")

start_time = pd.Timestamp('2022-03-01 08:00:00')
end_time = start_time + pd.Timedelta(hours=300)

data_24h = data[(data['timestamps'] >= start_time) & (data['timestamps'] < end_time)]


timestamps_24h = data_24h['timestamps']
ssr_24h = data_24h['ssr'].values
photo_24h = data_24h['photo'].values

adjustment = pd.Timedelta(hours=8)
timestamps_24h_adjusted = timestamps_24h - adjustment

filtered_state_estimates_24h = kf.filter(z_all[(timestamps_all >= start_time) & (timestamps_all < end_time)])[0]


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=timestamps_24h, y=photo_24h.flatten(),
    mode='lines', name='True PV Output (photo)',
    line=dict(color='blue')
))


fig.add_trace(go.Scatter(
    x=timestamps_24h_adjusted, y=filtered_state_estimates_24h.flatten(),
    mode='lines', name='Filtered PV Output (state estimate)',
    line=dict(color='green', dash='dash')
))

fig.update_layout(
    title='PV Output hourly prediction Period',
    xaxis_title='Time',
    yaxis_title='PV Output',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d %H:%M')
)
fig.show()


## Include lag feature as additional input

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = df_hourly_merged.copy()

data['timestamps'] = pd.to_datetime(data['timestamps'])

x_all = photo_all.reshape(-1, 1)  
z_all = ssr_all.reshape(-1, 1)  


ssr_all = data['ssr'].values
photo_all = data['photo'].values
pv_lag = data['pv_lag'].values
celsius_all = data['celsius'].values

data_2022 = data[data['timestamps'].dt.year == 2022]


ssr_2022 = data_2022['ssr'].values
photo_2022 = data_2022['photo'].values
pv_lag_2022 = data_2022['pv_lag'].values
celsius_2022 = data_2022['celsius'].values

z_all = np.column_stack((ssr_all, celsius_all, pv_lag))


z_2022 = np.column_stack((ssr_2022, celsius_2022, pv_lag_2022))


M_all = len(photo_all)


A = np.dot(x_all[1:].T, x_all[:-1]
           ) @ np.linalg.inv(np.dot(x_all[:-1].T, x_all[:-1]))
W = (np.dot(x_all[1:].T, x_all[1:]) -
     np.dot(A, np.dot(x_all[:-1].T, x_all[1:]))) / (M_all - 1)
H = np.dot(z_all.T, x_all) @ np.linalg.inv(np.dot(x_all.T, x_all))
Q = (np.dot(z_all.T, z_all) - np.dot(H, np.dot(x_all.T, z_all))) / M_all


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(3, 1)
Q = Q.reshape(3, 3)

kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_all[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates_all = kf.filter(z_all)[0]


filtered_state_estimates_2022 = kf.filter(z_2022)[0]


mae = mean_absolute_error(photo_all, filtered_state_estimates_all)
mse = mean_squared_error(photo_all, filtered_state_estimates_all)
rmse = np.sqrt(mse)
r_squared = r2_score(photo_all, filtered_state_estimates_all)


print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=data_2022['timestamps'][390:500], y=photo_2022.flatten()[390:500],
    mode='lines', name='True PV Output (PV)',
    line=dict(color='blue')
))


fig.add_trace(go.Scatter(
    x=data_2022['timestamps'][390:500], y=filtered_state_estimates_2022.flatten()[
        390:500],
    mode='lines+markers', name='Filtered PV Output (state estimate)',
    line=dict(color='green', dash='dash', width=2)
))


fig.update_layout(
    title='PV Output (Hourly) for 2022',
    xaxis_title='Time',
    yaxis_title='PV Output',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d'),

    height=400
)

fig.show()

# Wind hourly

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load your data into a DataFrame
data = df_hourly_merged  # Replace with your actual DataFrame

# Convert 'timestamps' to datetime if not already
data['timestamps'] = pd.to_datetime(data['timestamps'])

# Extract the relevant columns for the entire dataset
timestamps_all = data['timestamps']
windspeed = data['windspeed'].values
wind_energy = data['wind_on_offshore'].values

# Filter data for the year 2022
data_2022 = data[data['timestamps'].dt.year == 2022]

# Extract the relevant columns for 2022
timestamps_2022 = data_2022['timestamps']
wind_energy_2022 = data_2022['wind_on_offshore'].values

# Prepare matrices for the entire dataset
x_all = wind_energy.reshape(-1, 1)  # Hidden states (photo)
z_all = windspeed.reshape(-1, 1)  # Observations (ssr)

M_all = len(wind_energy)

# Estimate A, W, H, Q using the closed-form solutions for the entire dataset
A = np.dot(x_all[1:].T, x_all[:-1]) @ np.linalg.inv(np.dot(x_all[:-1].T, x_all[:-1]))
W = (np.dot(x_all[1:].T, x_all[1:]) - np.dot(A, np.dot(x_all[:-1].T, x_all[1:]))) / (M_all - 1)
H = np.dot(z_all.T, x_all) @ np.linalg.inv(np.dot(x_all.T, x_all))
Q = (np.dot(z_all.T, z_all) - np.dot(H, np.dot(x_all.T, z_all))) / M_all

# Convert to correct shapes
A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)

# Initialize Kalman Filter with estimated parameters
kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_all[0],
    initial_state_covariance=np.eye(1)
)

# Filter the observations for the entire dataset to estimate the states
filtered_state_estimates_all = kf.filter(z_all)[0]

# Calculate performance metrics for the entire dataset
mae = mean_absolute_error(wind_energy, filtered_state_estimates_all)
mse = mean_squared_error(wind_energy, filtered_state_estimates_all)
rmse = np.sqrt(mse)
r_squared = r2_score(wind_energy, filtered_state_estimates_all)

# Print performance metrics
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R-squared: {r_squared}")

# Specify the start date and time for the 24-hour period
start_time = pd.Timestamp('2022-03-01 08:00:00')
end_time = start_time + pd.Timedelta(hours=300)

# Filter data for the specified 24-hour period
data_24h = data[(data['timestamps'] >= start_time) & (data['timestamps'] < end_time)]

# Extract the relevant columns for the 24-hour period
timestamps_24h = data_24h['timestamps']

wind_energy_24 = data_24h['wind_on_offshore'].values

adjustment = pd.Timedelta(hours=0)
timestamps_24h_adjusted = timestamps_24h - adjustment

# Filter the observations for the 24-hour period to estimate the states
filtered_state_estimates_24h = kf.filter(z_all[(timestamps_all >= start_time) & (timestamps_all < end_time)])[0]

# Plot res
# Plot results using Plotly for the 24-hour period
fig = go.Figure()

# Add the true PV output for the 24-hour period
fig.add_trace(go.Scatter(
    x=timestamps_24h, y=wind_energy_24.flatten(),
    mode='lines', name='True Output ',
    line=dict(color='blue')
))

# Add the filtered state estimates for the 24-hour period with adjusted timestamps
fig.add_trace(go.Scatter(
    x=timestamps_24h_adjusted, y=filtered_state_estimates_24h.flatten(),
    mode='lines', name='Filtered Output (state estimate)',
    line=dict(color='green', dash='dash')
))

# Customize the layout
fig.update_layout(
    title='Wind Output hourly prediction Period',
    xaxis_title='Time',
    yaxis_title='On+Offshore in MW',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d')
)
fig.show()


------------
# Exploration 
Here, a lot of testing and exploration is done to see if I can get slightly better results.
Nothing has made it into the paper although some results were slightly better (for instance when normalizing)

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from pykalman import KalmanFilter


data['timestamps'] = pd.to_datetime(data['timestamps'])


data_2022 = data[data['timestamps'].dt.year == 2022]


timestamps = data_2022['timestamps']
ssr = data_2022['windspeed'].values
photo = data_2022['wind_on_offshore'].values


M = len(photo)


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


fig = go.Figure()


fig.add_trace(go.Scatter(
    x=timestamps, y=photo.flatten(),
    mode='lines', name='True PV Output (photo)',
    line=dict(color='blue')
))


fig.add_trace(go.Scatter(
    x=timestamps, y=filtered_state_estimates.flatten(),
    mode='lines', name='Filtered PV Output (state estimate)',
    line=dict(color='green', dash='dash')
))


fig.update_layout(
    title='PV Output and Filtered State Estimates for 2022',
    xaxis_title='Time',
    yaxis_title='Normalized Value',
    legend=dict(x=0.01, y=0.99),
    template='plotly_white',
    xaxis=dict(tickformat='%Y-%m-%d')
)

fig.show()

In [None]:
df_hourly_merged.fillna(0, inplace=True)

In [None]:
import pandas as pd
import numpy as np
import pylab as pl
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


data = df_merged_daily


timestamps = data['timestamps']
ssr = data['ssr'].values
photo = data['photo'].values


ssr = (ssr - np.mean(ssr)) / np.std(ssr)
photo = (photo - np.mean(photo)) / np.std(photo)


M = len(photo)


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


mae = mean_absolute_error(x, filtered_state_estimates)
mse = mean_squared_error(x, filtered_state_estimates)
rmse = np.sqrt(mse)
r2 = r2_score(x, filtered_state_estimates)

print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R-squared: {r2:.4f}")


last_state = filtered_state_estimates[-1]
forecasts = []

for _ in range(n_forecasts):
    last_state = A @ last_state
    forecasts.append(last_state.item())


pl.figure(figsize=(16, 6))
lines_true = pl.plot(x, linestyle='-', color='b',
                     label='True PV Output (photo)')

lines_filt = pl.plot(filtered_state_estimates, linestyle='-',
                     color='g', label='Filtered PV Output (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')


future_timestamps = range(len(photo), len(photo) + n_forecasts)
pl.plot(future_timestamps, forecasts, linestyle='-',
        color='r', label='Forecasted PV Output')
pl.legend()
pl.show()

## Normalizing helps
### Also implemented the filter method by literally using the formula instead of library, can be seen in for loop

In [None]:
import pandas as pd
import numpy as np
import pylab as pl
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


timestamps = data['timestamps']
windspeed = data['windspeed'].values
wind_on_offshore = data['wind_on_offshore'].values


windspeed = (windspeed - np.mean(windspeed)) / np.std(windspeed)
wind_on_offshore = (wind_on_offshore -
                    np.mean(wind_on_offshore)) / np.std(wind_on_offshore)


M = len(wind_on_offshore)


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


mae = mean_absolute_error(x, filtered_state_estimates)
mse = mean_squared_error(x, filtered_state_estimates)
rmse = np.sqrt(mse)
r2 = r2_score(x, filtered_state_estimates)

print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R-squared: {r2:.4f}")


last_state = filtered_state_estimates[-1]
forecasts = []

for _ in range(n_forecasts):
    last_state = A @ last_state
    forecasts.append(last_state.item())


pl.figure(figsize=(16, 6))
lines_true = pl.plot(x, linestyle='-', color='b',
                     label='True Wind On/Offshore (wind_on_offshore)')

lines_filt = pl.plot(filtered_state_estimates, linestyle='-',
                     color='g', label='Filtered Wind On/Offshore (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')


future_timestamps = range(len(wind_on_offshore),
                          len(wind_on_offshore) + n_forecasts)
pl.plot(future_timestamps, forecasts, linestyle='-',
        color='r', label='Forecasted Wind On/Offshore')
pl.legend()
pl.show()

In [None]:
import pandas as pd
import numpy as np
import pylab as pl
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


timestamps = data['timestamps']
windspeed = data['windspeed'].values
wind_on_offshore = data['wind_on_offshore'].values


M = len(wind_on_offshore)


A = np.dot(x[1:].T, x[:-1]) @ np.linalg.inv(np.dot(x[:-1].T, x[:-1]))
W = (np.dot(x[1:].T, x[1:]) - np.dot(A, np.dot(x[:-1].T, x[1:]))) / (M - 1)
H = np.dot(z.T, x) @ np.linalg.inv(np.dot(x.T, x))
Q = (np.dot(z.T, z) - np.dot(H, np.dot(x.T, z))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_estimates = kf.filter(z)[0]


mae = mean_absolute_error(x, filtered_state_estimates)
mse = mean_squared_error(x, filtered_state_estimates)
rmse = np.sqrt(mse)
r2 = r2_score(x, filtered_state_estimates)

print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R-squared: {r2:.4f}")


last_state = filtered_state_estimates[-1]
forecasts = []

for _ in range(n_forecasts):
    last_state = A @ last_state
    forecasts.append(last_state.item())


pl.figure(figsize=(16, 6))
lines_true = pl.plot(x, linestyle='-', color='b',
                     label='True Wind On/Offshore (wind_on_offshore)')

lines_filt = pl.plot(filtered_state_estimates, linestyle='-',
                     color='g', label='Filtered Wind On/Offshore (state estimate)')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')


future_timestamps = range(len(wind_on_offshore),
                          len(wind_on_offshore) + n_forecasts)
pl.plot(future_timestamps, forecasts, linestyle='-',
        color='r', label='Forecasted Wind On/Offshore')
pl.legend()
pl.show()

In [None]:
import pandas as pd
import numpy as np
import pylab as pl
from pykalman import KalmanFilter
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


timestamps = data['timestamps']
windspeed = data['windspeed'].values
wind_on_offshore = data['wind_on_offshore'].values


windspeed = (windspeed - np.mean(windspeed)) / np.std(windspeed)
wind_on_offshore = (wind_on_offshore -
                    np.mean(wind_on_offshore)) / np.std(wind_on_offshore)


train_size = int(len(windspeed) * 0.75)
train_windspeed = windspeed[:train_size]
train_wind_on_offshore = wind_on_offshore[:train_size]
test_windspeed = windspeed[train_size:]
test_wind_on_offshore = wind_on_offshore[train_size:]


M = len(x_train)


A = np.dot(x_train[1:].T, x_train[:-1]
           ) @ np.linalg.inv(np.dot(x_train[:-1].T, x_train[:-1]))
W = (np.dot(x_train[1:].T, x_train[1:]) -
     np.dot(A, np.dot(x_train[:-1].T, x_train[1:]))) / (M - 1)
H = np.dot(z_train.T, x_train) @ np.linalg.inv(np.dot(x_train.T, x_train))
Q = (np.dot(z_train.T, z_train) - np.dot(H, np.dot(x_train.T, z_train))) / M


A = A.reshape(1, 1)
W = W.reshape(1, 1)
H = H.reshape(1, 1)
Q = Q.reshape(1, 1)


kf = KalmanFilter(
    transition_matrices=A,
    observation_matrices=H,
    transition_covariance=W,
    observation_covariance=Q,
    initial_state_mean=x_train[0],
    initial_state_covariance=np.eye(1)
)


filtered_state_means = []
predicted_state_means = []

current_state_mean = x_train[-1]
current_state_covariance = np.eye(1)

for t in range(len(test_windspeed)):

    current_state_mean, current_state_covariance = kf.filter_update(
        current_state_mean,
        current_state_covariance,
        observation=test_windspeed[t].reshape(1, 1)
    )
    predicted_state_means.append(current_state_mean[0, 0])

    filtered_state_means.append(test_wind_on_offshore[t])


mae = mean_absolute_error(filtered_state_means, predicted_state_means)
mse = mean_squared_error(filtered_state_means, predicted_state_means)
rmse = np.sqrt(mse)
r2 = r2_score(filtered_state_means, predicted_state_means)

print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R-squared: {r2:.4f}")


pl.figure(figsize=(16, 6))
pl.plot(range(train_size, train_size + len(test_windspeed)), filtered_state_means,
        linestyle='-', color='b', label='True Wind On/Offshore (wind_on_offshore)')
pl.plot(range(train_size, train_size + len(test_windspeed)), predicted_state_means,
        linestyle='--', color='g', label='Predicted Wind On/Offshore')
pl.legend()
pl.xlabel('Time')
pl.ylabel('Normalized Value')
pl.show()