In [30]:
import sqlite3
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder


In [31]:
conn = sqlite3.connect(r'C:\Users\opper\Master-Arbeit\data\preprocessed\pv_forecast.db')

query = f"""
SELECT "Date time", Location, shortwave_radiation_tilted, Temperature, "kW/kWp"
FROM pv_weather_data
ORDER BY Location, "Date time" ASC
"""
# Daten abrufen und in ein DataFrame laden
df = pd.read_sql(query, conn)
conn.close()

# Ausgabe des DataFrames zum Überprüfen
print(df)

                        Date time   Location  shortwave_radiation_tilted  \
0       2023-12-31 23:00:00+00:00  Bielefeld                         0.0   
1       2023-12-31 23:15:00+00:00  Bielefeld                         0.0   
2       2023-12-31 23:30:00+00:00  Bielefeld                         0.0   
3       2023-12-31 23:45:00+00:00  Bielefeld                         0.0   
4       2024-01-01 00:00:00+00:00  Bielefeld                         0.0   
...                           ...        ...                         ...   
390299  2022-03-23 12:45:00+00:00   Victoria                         0.0   
390300  2022-03-23 13:00:00+00:00   Victoria                         0.0   
390301  2022-03-23 13:00:00+00:00   Victoria                         0.0   
390302  2022-03-23 13:00:00+00:00   Victoria                         0.0   
390303  2022-03-23 13:00:00+00:00   Victoria                         0.0   

        Temperature  kW/kWp  
0               7.3     0.0  
1               7.5     0.0

In [32]:
def get_sliding_window(df, datetime_col, feature_cols, target_col, window_size=672, forecast_horizon=96, step_size=1):
    df[datetime_col] = pd.to_datetime(df[datetime_col])
    df = df.sort_values(by=datetime_col).reset_index(drop=True)

    feature_data = df[feature_cols].values
    target_data = df[target_col].values

    X, y = [], []

    for i in range(0, len(df) - window_size - forecast_horizon, step_size):
        X.append(feature_data[i : i + window_size])  # Input-Sequenz
        y.append(target_data[i + window_size : i + window_size + forecast_horizon])  # Zielwerte

    return np.array(X), np.array(y)





In [33]:
required_columns = ["Date time", "Location", "shortwave_radiation_tilted", "Temperature", "kW/kWp"]
missing_cols = [col for col in required_columns if col not in df.columns]
if missing_cols:
    raise ValueError(f"Fehlende Spalten in der Datenbank: {missing_cols}")

df = df.sort_values(by=["Location", "Date time"]).reset_index(drop=True)

In [None]:
feature_cols = ["shortwave_radiation_tilted", "Temperature", "kW/kWp"]
target_col = "kW/kWp"

X_list, y_list = [], []

for location in df["Location"].unique():
    print(f"Erstelle Sliding Windows für {location}...")

    df_location = df[df["Location"] == location].copy()

    X, y = get_sliding_window(df_location, datetime_col="Date time", feature_cols=feature_cols, target_col=target_col)

    X_list.append(X)
    y_list.append(y)

X_final = np.concatenate(X_list, axis=0)  # Alle Input-Sequenzen
y_final = np.concatenate(y_list, axis=0)  # Alle Zielwerte

np.save("X_tensor.npy", X_final)
np.save("y_tensor.npy", y_final)

print(f"Finaler Tensor X Shape: {X_final.shape}")  # (Samples, Timesteps=672, Features=3)
print(f"Finaler Tensor y Shape: {y_final.shape}")  # (Samples, Forecast_Horizon=96)


Erstelle Sliding Windows für Bielefeld...
Erstelle Sliding Windows für Gaithersburg...
Erstelle Sliding Windows für Hongkong...
Erstelle Sliding Windows für Istanbul...
Erstelle Sliding Windows für Victoria...
Finaler Tensor X Shape: (386464, 672, 2)
Finaler Tensor y Shape: (386464, 96)
