<a href="https://colab.research.google.com/github/rjanow/Masterarbeit/blob/main/Modeling_and_Prediction_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install wandb --upgrade

In [2]:
import os
import datetime as dt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.preprocessing import MinMaxScaler
import joblib
import wandb
from wandb.integration.keras import WandbCallback

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Reproduzierbarkeit
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [5]:
name_Messwerte   = 'Messdaten_CAMS_GHI.csv'
name_Vorhersage  = 'Vorhersagedaten_CAMS_VarIdx.csv'
folder_import    = '/content/drive/My Drive/Colab_Notebooks/Clean_Data/'

model_path       = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/full_model.keras'
weights_path     = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5'

testdata_path_X  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_X.csv'
testdata_path_Y  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_Y.csv'

text_file_path   = "/content/drive/MyDrive/Colab_Notebooks/LSTM_Model/model_results.txt"

# Optional: Scaler speichern
scaler_x_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_X.pkl'
scaler_y_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_y.pkl'

In [6]:
model_config = {
    "units_1": 64,
    "units_2": 32,
    "dropout_rate": 0.1,
    "final_activation": "linear"
}

In [7]:
training_config = {
    "loss": "mean_squared_error",
    "optimizer": "adam",
    "metrics": [
        "mse",
        "mae",
        "mape",
        keras.metrics.RootMeanSquaredError(name="rmse")
    ]
}

In [8]:
fit_config = {
    "epochs": 10,
    "batch_size": 32,
    "sequence_length": 16
}

In [9]:
path_mess   = os.path.join(folder_import, name_Messwerte)
path_vorher = os.path.join(folder_import, name_Vorhersage)

df_mess   = pd.read_csv(path_mess)
df_vorher = pd.read_csv(path_vorher)

In [10]:
df_mess.set_index('Datetime', inplace=True)
df_mess.index = pd.to_datetime(df_mess.index)

In [11]:
df_vorher.set_index('Datetime', inplace=True)
df_vorher.index = pd.to_datetime(df_vorher.index)

In [12]:

def merge_high_low_freq(
    df_high,                # z.B. df_mess (2-Min-Index)
    df_low,                 # z.B. df_vorher (1h-Index)
    direction="backward",   # "backward" | "forward" | "nearest"
    tolerance="31min",      # max. Abstand, sonst NaN
    suffix_low="_f"
):
    # Sicherstellen: DatetimeIndex, sortiert
    df_high = df_high.sort_index()
    df_low  = df_low.sort_index()

    # Index -> Spalte für merge_asof
    hi = df_high.rename_axis("ts").reset_index()
    lo = df_low.rename_axis("ts").reset_index()

    merged = pd.merge_asof(
        hi, lo, on="ts",
        direction=direction,
        tolerance=pd.Timedelta(tolerance),
        suffixes=("", suffix_low)
    )

    merged = merged.set_index("ts").sort_index()
    return merged

In [13]:
df = merge_high_low_freq(df_mess, df_vorher, direction="backward", tolerance="40min")

In [14]:
print(df)

                      Observation_period  Clear_sky_GHI  Clear_sky_BHI  \
ts                                                                       
2022-06-15 07:20:00  2022-06-15 07:20:00        566.244        463.071   
2022-06-15 07:22:00  2022-06-15 07:22:00        571.440        467.799   
2022-06-15 07:24:00  2022-06-15 07:24:00        576.618        472.512   
2022-06-15 07:26:00  2022-06-15 07:26:00        581.772        477.204   
2022-06-15 07:28:00  2022-06-15 07:28:00        586.905        481.881   
...                                  ...            ...            ...   
2023-08-19 05:48:00  2023-08-19 05:48:00        148.533         82.110   
2023-08-19 05:50:00  2023-08-19 05:50:00        153.717         85.974   
2023-08-19 05:52:00  2023-08-19 05:52:00        158.928         89.892   
2023-08-19 05:54:00  2023-08-19 05:54:00        164.172         93.858   
2023-08-19 05:56:00  2023-08-19 05:56:00        169.440         97.872   

                         GHI       BH

In [15]:
candidate_X = [c for c in ['ghi','SZA','time_sin','time_cos','date_sin','date_cos','Temp']
               if c in df.columns]
columns_X = candidate_X  # <- hier bei Bedarf erweitern/ändern
columns_y = ["UVI"] if "UVI" in df.columns else ["UV"]  # wähle 'UVI' oder ersatzweise 'UV'

print("Features (X):", columns_X)
print("Target (y):", columns_y)

Features (X): ['ghi', 'SZA', 'time_sin', 'time_cos', 'date_sin', 'date_cos', 'Temp']
Target (y): ['UVI']


In [16]:
total_length = len(df)
train_size   = round(total_length * 0.80)
val_size     = round(total_length * 0.10)
test_size    = total_length - train_size - val_size  # restliche 10%

train_df = df.iloc[:train_size].copy()
val_df   = df.iloc[train_size : train_size + val_size].copy()
test_df  = df.iloc[train_size + val_size :].copy()

print(f"Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")

Train: (77386, 37), Val: (9673, 37), Test: (9673, 37)


In [17]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

train_df[columns_X] = scaler_X.fit_transform(train_df[columns_X])
train_df[columns_y] = scaler_y.fit_transform(train_df[columns_y])

val_df[columns_X] = scaler_X.transform(val_df[columns_X])
val_df[columns_y] = scaler_y.transform(val_df[columns_y])

test_df[columns_X] = scaler_X.transform(test_df[columns_X])
test_df[columns_y] = scaler_y.transform(test_df[columns_y])

In [18]:
joblib.dump(scaler_X, scaler_x_path)
joblib.dump(scaler_y, scaler_y_path)

['/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_y.pkl']

In [19]:
def make_sequences(df_in: pd.DataFrame, X_cols, y_cols, seq_len: int):
    X_seq, y_seq = [], []
    values_X = df_in[X_cols].values
    values_y = df_in[y_cols].values
    for i in range(len(df_in) - seq_len):
        X_seq.append(values_X[i:i+seq_len])
        # One-step-ahead: nächster Zeitpunkt als Ziel
        y_seq.append(values_y[i+seq_len])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq, dtype=np.float32)

SEQ_LEN = fit_config["sequence_length"]

X_train, y_train = make_sequences(train_df, columns_X, columns_y, SEQ_LEN)
X_val,   y_val   = make_sequences(val_df,   columns_X, columns_y, SEQ_LEN)
X_test,  y_test  = make_sequences(test_df,  columns_X, columns_y, SEQ_LEN)

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_val:", X_val.shape, "y_val:", y_val.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)

X_train: (77370, 16, 7) y_train: (77370, 1)
X_val: (9657, 16, 7) y_val: (9657, 1)
X_test: (9657, 16, 7) y_test: (9657, 1)


In [20]:
pd.DataFrame(X_test.reshape(X_test.shape[0], -1)).to_csv(testdata_path_X, index=False)
pd.DataFrame(y_test, columns=columns_y).to_csv(testdata_path_Y, index=False)

In [21]:
n_features = len(columns_X)
n_targets  = len(columns_y)

inputs = keras.Input(shape=(SEQ_LEN, n_features))
x = layers.LSTM(model_config["units_1"], return_sequences=True)(inputs)
x = layers.Dropout(model_config["dropout_rate"])(x)
x = layers.LSTM(model_config["units_2"])(x)
x = layers.Dropout(model_config["dropout_rate"])(x)
outputs = layers.Dense(n_targets, activation=model_config["final_activation"])(x)

model = keras.Model(inputs, outputs, name="lstm_uv_forecast")
model.compile(
    loss=training_config["loss"],
    optimizer=training_config["optimizer"],
    metrics=training_config["metrics"]
)
model.summary()

In [22]:
# API-KEY: e414a24f0e911b489706e194692432c001883cd3
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mrobin-janowitz[0m ([33mrobin-janowitz-hochschule-bonn-rhein-sieg[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [23]:
wandb_run = wandb.init(
    project="uv-forecasting",
    name=f"lstm-{dt.datetime.now().strftime('%Y%m%d-%H%M%S')}",
    config={
        "model_config": model_config,
        "training_config": {
            "loss": training_config["loss"],
            "optimizer": "adam",
            "metrics": ["mse", "mae", "mape", "rmse"]
        },
        "fit_config": fit_config,
        "n_features": n_features,
        "n_targets": n_targets,
        "sequence_length": SEQ_LEN,
        "split_sizes": {
            "train": train_size, "val": val_size, "test": test_size
        },
        "columns_X": columns_X,
        "columns_y": columns_y
    }
)

# =========================
# Callbacks (inkl. W&B)
# =========================
callbacks = [
    WandbCallback(
        save_model=False,     # wir speichern selbst unten
        log_weights=False,    # optional
        save_graph=False      # <-- Graph-Logging ausschalten (Fix für Keras 3)
    ),
    keras.callbacks.EarlyStopping(monitor="val_rmse", patience=5, mode="min", restore_best_weights=True),
    keras.callbacks.ModelCheckpoint(
        filepath=weights_path, save_weights_only=True,
        monitor="val_rmse", mode="min", save_best_only=True, verbose=1
    ),
    keras.callbacks.CSVLogger(text_file_path.replace(".txt", "_history.csv"))
]



In [24]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=fit_config["epochs"],
    batch_size=fit_config["batch_size"],
    callbacks=callbacks,
    verbose=1
)

Epoch 1/10
[1m2416/2418[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 0.0035 - mae: 0.0341 - mape: 11435.1025 - mse: 0.0035 - rmse: 0.0562
Epoch 1: val_rmse improved from inf to 0.04849, saving model to /content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5
[1m2418/2418[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 22ms/step - loss: 0.0035 - mae: 0.0341 - mape: 11429.6934 - mse: 0.0035 - rmse: 0.0562 - val_loss: 0.0024 - val_mae: 0.0289 - val_mape: 31.3760 - val_mse: 0.0024 - val_rmse: 0.0485
Epoch 2/10
[1m2416/2418[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - loss: 0.0011 - mae: 0.0194 - mape: 5965.3584 - mse: 0.0011 - rmse: 0.0327
Epoch 2: val_rmse improved from 0.04849 to 0.04386, saving model to /content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5
[1m2418/2418[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 25ms/step - loss: 0.0011 - mae: 0.0194 - mape: 5963.6230 - mse: 0.

In [25]:
eval_results = model.evaluate(X_test, y_test, verbose=0)
metrics_names = model.metrics_names
results_dict = {name: float(val) for name, val in zip(metrics_names, eval_results)}
print("Test-Ergebnisse:", results_dict)

Test-Ergebnisse: {'loss': 0.0027228479739278555, 'compile_metrics': 0.0027228479739278555}


In [26]:
wandb.log({f"test/{k}": v for k, v in results_dict.items()})

In [27]:
if os.path.exists(weights_path):
    model.load_weights(weights_path)

In [28]:
model.save(model_path)

In [29]:
with open(text_file_path, "w") as f:
    f.write("=== LSTM UV-Forecasting – Ergebnisse ===\n")
    f.write(f"Zeit: {dt.datetime.now()}\n\n")
    f.write("Konfigurationen:\n")
    f.write(f"model_config: {model_config}\n")
    f.write(f"training_config: {training_config}\n")
    f.write(f"fit_config: {fit_config}\n")
    f.write(f"Features (X): {columns_X}\nZiel (y): {columns_y}\n\n")
    f.write("Test-Performance:\n")
    for k, v in results_dict.items():
        f.write(f"  {k}: {v:.6f}\n")
    f.write("\nPfade:\n")
    f.write(f"  model_path:   {model_path}\n")
    f.write(f"  weights_path: {weights_path}\n")
    f.write(f"  test_X_csv:   {testdata_path_X}\n")
    f.write(f"  test_Y_csv:   {testdata_path_Y}\n")
    f.write(f"  scaler_X:     {scaler_x_path}\n")
    f.write(f"  scaler_y:     {scaler_y_path}\n")

wandb.finish()

0,1
epoch,▁▂▃▄▅▅▆▇█
loss,█▂▂▂▁▁▁▁▁
mae,█▃▂▂▂▁▁▁▁
mape,█▂▂▃▃▂▂▁▂
mse,█▂▂▂▁▁▁▁▁
rmse,█▃▂▂▂▁▁▁▁
test/compile_metrics,▁
test/loss,▁
val_loss,█▃▃▁▂▆▃▃▃
val_mae,█▄▃▁▂▅▃▃▃

0,1
best_epoch,3.0
best_val_loss,0.00174
epoch,8.0
loss,0.00077
mae,0.01641
mape,4562.20703
mse,0.00077
rmse,0.0277
test/compile_metrics,0.00272
test/loss,0.00272
