<a href="https://colab.research.google.com/github/rjanow/Masterarbeit/blob/main/Modeling_and_Prediction_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
!pip -q install wandb --upgrade

In [31]:
import os
import datetime as dt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
from sklearn.preprocessing import MinMaxScaler
import joblib
import wandb
from wandb.integration.keras import WandbCallback

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
# Reproduzierbarkeit
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [34]:
name_Messwerte   = 'Messdaten_CAMS_GHI.csv'
name_Vorhersage  = 'Vorhersagedaten_CAMS_VarIdx.csv'
folder_import    = '/content/drive/My Drive/Colab_Notebooks/Clean_Data/'

model_path       = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/full_model.keras'
weights_path     = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5'

testdata_path_X  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_X.csv'
testdata_path_Y  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_Y.csv'

text_file_path   = "/content/drive/MyDrive/Colab_Notebooks/LSTM_Model/model_results.txt"

# Optional: Scaler speichern
scaler_x_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_X.pkl'
scaler_y_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_y.pkl'

In [35]:
model_config = {
    "units_1": 64,
    # "units_2": 64,
    "units_3" : 32,
    "dropout_rate": 0.1,
    "final_activation": "linear"
}

In [36]:
training_config = {
    "loss": "mean_squared_error",
    "optimizer": "adam",
    "metrics": [
        "mse",
        "mae",
        "mape",
        keras.metrics.RootMeanSquaredError(name="rmse")
    ]
}

In [37]:
fit_config = {
    "epochs": 10,
    "batch_size": 32,        # die ersten Modelle liefen mit 32
    "sequence_length": 64
}

In [38]:
path_mess   = os.path.join(folder_import, name_Messwerte)
path_vorher = os.path.join(folder_import, name_Vorhersage)

df_mess   = pd.read_csv(path_mess)
df_vorher = pd.read_csv(path_vorher)

In [39]:
df_mess.set_index('Datetime', inplace=True)
df_mess.index = pd.to_datetime(df_mess.index)

In [40]:
df_vorher.set_index('Datetime', inplace=True)
df_vorher.index = pd.to_datetime(df_vorher.index)

In [41]:

def merge_high_low_freq(
    df_high,                # z.B. df_mess (2-Min-Index)
    df_low,                 # z.B. df_vorher (1h-Index)
    direction="backward",   # "backward" | "forward" | "nearest"
    tolerance="31min",      # max. Abstand, sonst NaN
    suffix_low="_f"
):
    # Sicherstellen: DatetimeIndex, sortiert
    df_high = df_high.sort_index()
    df_low  = df_low.sort_index()

    # Index -> Spalte für merge_asof
    hi = df_high.rename_axis("ts").reset_index()
    lo = df_low.rename_axis("ts").reset_index()

    merged = pd.merge_asof(
        hi, lo, on="ts",
        direction=direction,
        tolerance=pd.Timedelta(tolerance),
        suffixes=("", suffix_low)
    )

    merged = merged.set_index("ts").sort_index()
    return merged

In [42]:
df = merge_high_low_freq(df_mess, df_vorher, direction="backward", tolerance="40min")

In [43]:
print(df.isnull().sum())

Observation_period        0
Clear_sky_GHI             0
Clear_sky_BHI             0
GHI                       0
BHI                       0
UVI                       0
UVA                       0
UVB                       0
erythem                   0
Datum                     0
Uhrzeit                   0
Messzeitpunkt             0
ghi                       0
Dif                       0
Glo_SPLite                0
Dir                       0
Temp                      0
DiffGreater2              0
SZA                       0
time_sin                  0
time_cos                  0
date_sin                  0
date_cos                  0
Date                      0
Hour                      0
aod469                34413
aod550                34413
gtco3                 34413
uvbed                 34413
uvbedcs               34413
hcc                   34413
lcc                   34413
tcc                   34413
index_sigma           65751
index_coimbra         65751
index_stein         

In [44]:
candidate_X = [c for c in ['GHI','BHI','Clear_sky_GHI','Clear_sky_BHI','SZA','time_sin','time_cos','date_sin','date_cos','Temp']
               if c in df.columns]
columns_X = candidate_X  # <- hier bei Bedarf erweitern/ändern
columns_y = ["UVI"] if "UVI" in df.columns else ["UV"]  # wähle 'UVI' oder ersatzweise 'UV'

print("Features (X):", columns_X)
print("Target (y):", columns_y)

Features (X): ['GHI', 'BHI', 'Clear_sky_GHI', 'Clear_sky_BHI', 'SZA', 'time_sin', 'time_cos', 'date_sin', 'date_cos', 'Temp']
Target (y): ['UVI']


In [45]:
total_length = len(df)
train_size   = round(total_length * 0.80)
val_size     = round(total_length * 0.10)
test_size    = total_length - train_size - val_size  # restliche 10%

train_df = df.iloc[:train_size].copy()
val_df   = df.iloc[train_size : train_size + val_size].copy()
test_df  = df.iloc[train_size + val_size :].copy()

print(f"Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")

Train: (77386, 37), Val: (9673, 37), Test: (9673, 37)


In [46]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

train_df[columns_X] = scaler_X.fit_transform(train_df[columns_X])
train_df[columns_y] = scaler_y.fit_transform(train_df[columns_y])

val_df[columns_X] = scaler_X.transform(val_df[columns_X])
val_df[columns_y] = scaler_y.transform(val_df[columns_y])

test_df[columns_X] = scaler_X.transform(test_df[columns_X])
test_df[columns_y] = scaler_y.transform(test_df[columns_y])

In [47]:
joblib.dump(scaler_X, scaler_x_path)
joblib.dump(scaler_y, scaler_y_path)

['/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_y.pkl']

In [48]:
def make_sequences(df_in: pd.DataFrame, X_cols, y_cols, seq_len: int):
    X_seq, y_seq = [], []
    values_X = df_in[X_cols].values
    values_y = df_in[y_cols].values
    for i in range(len(df_in) - seq_len):
        X_seq.append(values_X[i:i+seq_len])
        # One-step-ahead: nächster Zeitpunkt als Ziel
        y_seq.append(values_y[i+seq_len])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq, dtype=np.float32)

SEQ_LEN = fit_config["sequence_length"]

X_train, y_train = make_sequences(train_df, columns_X, columns_y, SEQ_LEN)
X_val,   y_val   = make_sequences(val_df,   columns_X, columns_y, SEQ_LEN)
X_test,  y_test  = make_sequences(test_df,  columns_X, columns_y, SEQ_LEN)

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_val:", X_val.shape, "y_val:", y_val.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)

X_train: (77322, 64, 10) y_train: (77322, 1)
X_val: (9609, 64, 10) y_val: (9609, 1)
X_test: (9609, 64, 10) y_test: (9609, 1)


In [49]:
pd.DataFrame(X_test.reshape(X_test.shape[0], -1)).to_csv(testdata_path_X, index=False)
pd.DataFrame(y_test, columns=columns_y).to_csv(testdata_path_Y, index=False)

In [50]:
n_features = len(columns_X)
n_targets  = len(columns_y)

inputs = keras.Input(shape=(SEQ_LEN, n_features))

x = layers.LSTM(model_config["units_1"], return_sequences=True)(inputs)
x = layers.Dropout(model_config["dropout_rate"])(x)

# NEU: zusätzliche LSTM-Schicht (Sequenzen bleiben erhalten)
x = layers.LSTM(model_config.get("units_1b", model_config["units_1"]), return_sequences=True)(x)
x = layers.Dropout(model_config["dropout_rate"])(x)

# # Bestehende 2. LSTM muss Sequenzen zurückgeben, weil noch eine LSTM folgt
# x = layers.LSTM(model_config["units_2"], return_sequences=True)(x)
# x = layers.Dropout(model_config["dropout_rate"])(x)

# Letzte LSTM ohne return_sequences (default=False)
x = layers.LSTM(model_config["units_3"])(x)
x = layers.Dropout(model_config["dropout_rate"])(x)

outputs = layers.Dense(n_targets, activation=model_config["final_activation"])(x)

model = keras.Model(inputs, outputs, name="lstm_uv_forecast")
model.compile(
    loss=training_config["loss"],
    optimizer=training_config["optimizer"],
    metrics=training_config["metrics"]
)
model.summary()

In [51]:
# API-KEY: e414a24f0e911b489706e194692432c001883cd3
wandb.login(key= 'e414a24f0e911b489706e194692432c001883cd3')

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [52]:
wandb_run = wandb.init(
    project="uv-forecasting",
    name=f"lstm-{dt.datetime.now().strftime('%Y%m%d-%H%M%S')}",
    config={
        "model_config": model_config,
        "training_config": {
            "loss": training_config["loss"],
            "optimizer": "adam",
            "metrics": ["mse", "mae", "mape", "rmse"]
        },
        "fit_config": fit_config,
        "n_features": n_features,
        "n_targets": n_targets,
        "sequence_length": SEQ_LEN,
        "split_sizes": {
            "train": train_size, "val": val_size, "test": test_size
        },
        "columns_X": columns_X,
        "columns_y": columns_y
    }
)

# =========================
# Callbacks (inkl. W&B)
# =========================
callbacks = [
    WandbCallback(
        save_model=False,     # wir speichern selbst unten
        log_weights=False,    # optional
        save_graph=False      # <-- Graph-Logging ausschalten (Fix für Keras 3)
    ),
    keras.callbacks.EarlyStopping(monitor="val_rmse", patience=5, mode="min", restore_best_weights=True),
    keras.callbacks.ModelCheckpoint(
        filepath=weights_path, save_weights_only=True,
        monitor="val_rmse", mode="min", save_best_only=True, verbose=1
    ),
    keras.callbacks.CSVLogger(text_file_path.replace(".txt", "_history.csv"))
]

In [53]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=fit_config["epochs"],
    batch_size=fit_config["batch_size"],
    callbacks=callbacks,
    verbose=1
)

Epoch 1/10
[1m2417/2417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - loss: 0.0036 - mae: 0.0343 - mape: 13430.7773 - mse: 0.0036 - rmse: 0.0586
Epoch 1: val_rmse improved from inf to 0.06738, saving model to /content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5
[1m2417/2417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 106ms/step - loss: 0.0036 - mae: 0.0343 - mape: 13428.5703 - mse: 0.0036 - rmse: 0.0586 - val_loss: 0.0045 - val_mae: 0.0409 - val_mape: 40.3865 - val_mse: 0.0045 - val_rmse: 0.0674
Epoch 2/10
[1m2417/2417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - loss: 0.0021 - mae: 0.0258 - mape: 4650.7607 - mse: 0.0021 - rmse: 0.0461
Epoch 2: val_rmse improved from 0.06738 to 0.06463, saving model to /content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5
[1m2417/2417[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 110ms/step - loss: 0.0021 - mae: 0.0258 - mape: 4650.8652 - m

In [54]:
eval_results = model.evaluate(X_test, y_test, verbose=0)
metrics_names = model.metrics_names
results_dict = {name: float(val) for name, val in zip(metrics_names, eval_results)}
print("Test-Ergebnisse:", results_dict)

Test-Ergebnisse: {'loss': 0.004982481710612774, 'compile_metrics': 0.004982481710612774}


In [55]:
wandb.log({f"test/{k}": v for k, v in results_dict.items()})

In [56]:
if os.path.exists(weights_path):
    model.load_weights(weights_path)

In [57]:
model.save(model_path)

In [58]:
with open(text_file_path, "w") as f:
    f.write("=== LSTM UV-Forecasting – Ergebnisse ===\n")
    f.write(f"Zeit: {dt.datetime.now()}\n\n")
    f.write("Konfigurationen:\n")
    f.write(f"model_config: {model_config}\n")
    f.write(f"training_config: {training_config}\n")
    f.write(f"fit_config: {fit_config}\n")
    f.write(f"Features (X): {columns_X}\nZiel (y): {columns_y}\n\n")
    f.write("Test-Performance:\n")
    for k, v in results_dict.items():
        f.write(f"  {k}: {v:.6f}\n")
    f.write("\nPfade:\n")
    f.write(f"  model_path:   {model_path}\n")
    f.write(f"  weights_path: {weights_path}\n")
    f.write(f"  test_X_csv:   {testdata_path_X}\n")
    f.write(f"  test_Y_csv:   {testdata_path_Y}\n")
    f.write(f"  scaler_X:     {scaler_x_path}\n")
    f.write(f"  scaler_y:     {scaler_y_path}\n")

wandb.finish()

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▃▂▂▂▂▂▁▁
mae,█▄▃▃▂▂▂▂▁▁
mape,█▂▃▄▂▁▁▁▁▁
mse,█▄▃▂▂▂▂▂▁▁
rmse,█▄▃▃▂▂▂▂▁▁
test/compile_metrics,▁
test/loss,▁
val_loss,█▅▃▃▃▃▂▂▁▂
val_mae,█▆▃▄▃▂▂▁▁▁

0,1
best_epoch,8
best_val_loss,0.00368
epoch,9
loss,0.00169
mae,0.02297
mape,4365.48584
mse,0.00169
rmse,0.04114
test/compile_metrics,0.00498
test/loss,0.00498
