<a href="https://colab.research.google.com/github/rjanow/Masterarbeit/blob/main/Modeling_and_Prediction_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install wandb --upgrade

In [None]:
import os
import datetime as dt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
import joblib
import wandb
from wandb.keras import WandbCallback

In [None]:
# Reproduzierbarkeit
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
name_Messwerte   = 'Messdaten_CAMS_GHI.csv'
name_Vorhersage  = 'Vorhersagedaten_CAMS_VarIdx.csv'
folder_import    = '/content/drive/My Drive/Colab_Notebooks/Clean_Data/'

model_path       = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/full_model.keras'
weights_path     = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_weights.weights.h5'

testdata_path_X  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_X.csv'
testdata_path_Y  = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/model_testdata_Y.csv'

text_file_path   = "/content/drive/MyDrive/Colab_Notebooks/LSTM_Model/model_results.txt"

# Optional: Scaler speichern
scaler_x_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_X.pkl'
scaler_y_path    = '/content/drive/My Drive/Colab_Notebooks/LSTM_Model/scaler_y.pkl'

In [None]:
model_config = {
    "units_1": 64,
    "units_2": 32,
    "dropout_rate": 0.1,
    "final_activation": "linear"
}

In [None]:
training_config = {
    "loss": "mean_squared_error",
    "optimizer": "adam",
    "metrics": [
        "mse",
        "mae",
        "mape",
        keras.metrics.RootMeanSquaredError(name="rmse")
    ]
}

In [None]:
fit_config = {
    "epochs": 10,
    "batch_size": 32,
    "sequence_length": 16
}

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
path_mess   = os.path.join(folder_import, name_Messwerte)
path_vorher = os.path.join(folder_import, name_Vorhersage)

df_mess   = pd.read_csv(path_mess)
df_vorher = pd.read_csv(path_vorher)

In [None]:
time_cols = [c for c in ["Observation_period", "time", "timestamp", "Datetime"] if c in df_mess.columns]
assert len(time_cols) >= 1, "Keine Zeitspalte in Messdaten gefunden – bitte anpassen."
tcol = time_cols[0]
df_mess[tcol] = pd.to_datetime(df_mess[tcol])

if tcol in df_vorher.columns:
    df_vorher[tcol] = pd.to_datetime(df_vorher[tcol])

In [None]:
df = pd.merge(df_mess, df_vorher, on=tcol, how="inner", suffixes=("", "_f"))
df = df.sort_values(tcol).reset_index(drop=True)

In [None]:
candidate_X = [c for c in ["GHI", "ghi", "Clear_sky_GHI", "Dif", "Dir", "SZA", "BHI", "UVA", "UVB"]
               if c in df.columns]
columns_X = candidate_X  # <- hier bei Bedarf erweitern/ändern
columns_y = ["UVI"] if "UVI" in df.columns else ["UV"]  # wähle 'UVI' oder ersatzweise 'UV'

print("Features (X):", columns_X)
print("Target (y):", columns_y)

In [None]:
total_length = len(df)
train_size   = round(total_length * 0.80)
val_size     = round(total_length * 0.10)
test_size    = total_length - train_size - val_size  # restliche 10%

train_df = df.iloc[:train_size].copy()
val_df   = df.iloc[train_size : train_size + val_size].copy()
test_df  = df.iloc[train_size + val_size :].copy()

print(f"Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")

In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

train_df[columns_X] = scaler_X.fit_transform(train_df[columns_X])
train_df[columns_y] = scaler_y.fit_transform(train_df[columns_y])

val_df[columns_X] = scaler_X.transform(val_df[columns_X])
val_df[columns_y] = scaler_y.transform(val_df[columns_y])

test_df[columns_X] = scaler_X.transform(test_df[columns_X])
test_df[columns_y] = scaler_y.transform(test_df[columns_y])

In [None]:
joblib.dump(scaler_X, scaler_x_path)
joblib.dump(scaler_y, scaler_y_path)

In [None]:
def make_sequences(df_in: pd.DataFrame, X_cols, y_cols, seq_len: int):
    X_seq, y_seq = [], []
    values_X = df_in[X_cols].values
    values_y = df_in[y_cols].values
    for i in range(len(df_in) - seq_len):
        X_seq.append(values_X[i:i+seq_len])
        # One-step-ahead: nächster Zeitpunkt als Ziel
        y_seq.append(values_y[i+seq_len])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq, dtype=np.float32)

SEQ_LEN = fit_config["sequence_length"]

X_train, y_train = make_sequences(train_df, columns_X, columns_y, SEQ_LEN)
X_val,   y_val   = make_sequences(val_df,   columns_X, columns_y, SEQ_LEN)
X_test,  y_test  = make_sequences(test_df,  columns_X, columns_y, SEQ_LEN)

print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_val:", X_val.shape, "y_val:", y_val.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)

In [None]:
pd.DataFrame(X_test.reshape(X_test.shape[0], -1)).to_csv(testdata_path_X, index=False)
pd.DataFrame(y_test, columns=columns_y).to_csv(testdata_path_Y, index=False)

In [None]:
n_features = len(columns_X)
n_targets  = len(columns_y)

inputs = keras.Input(shape=(SEQ_LEN, n_features))
x = layers.LSTM(model_config["units_1"], return_sequences=True)(inputs)
x = layers.Dropout(model_config["dropout_rate"])(x)
x = layers.LSTM(model_config["units_2"])(x)
x = layers.Dropout(model_config["dropout_rate"])(x)
outputs = layers.Dense(n_targets, activation=model_config["final_activation"])(x)

model = keras.Model(inputs, outputs, name="lstm_uv_forecast")
model.compile(
    loss=training_config["loss"],
    optimizer=training_config["optimizer"],
    metrics=training_config["metrics"]
)
model.summary()

In [None]:
wandb.login()

In [None]:
wandb_run = wandb.init(
    project="uv-forecasting",
    name=f"lstm-{dt.datetime.now().strftime('%Y%m%d-%H%M%S')}",
    config={
        "model_config": model_config,
        "training_config": {
            "loss": training_config["loss"],
            "optimizer": "adam",
            "metrics": ["mse", "mae", "mape", "rmse"]
        },
        "fit_config": fit_config,
        "n_features": n_features,
        "n_targets": n_targets,
        "sequence_length": SEQ_LEN,
        "split_sizes": {
            "train": train_size, "val": val_size, "test": test_size
        },
        "columns_X": columns_X,
        "columns_y": columns_y
    }
)

# =========================
# Callbacks (inkl. W&B)
# =========================
callbacks = [
    WandbCallback(save_model=False),  # wir speichern unten manuell
    keras.callbacks.EarlyStopping(monitor="val_rmse", patience=5, mode="min", restore_best_weights=True),
    keras.callbacks.ModelCheckpoint(
        filepath=weights_path, save_weights_only=True,
        monitor="val_rmse", mode="min", save_best_only=True, verbose=1
    ),
    keras.callbacks.CSVLogger(text_file_path.replace(".txt", "_history.csv"))
]

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=fit_config["epochs"],
    batch_size=fit_config["batch_size"],
    callbacks=callbacks,
    verbose=1
)

In [None]:
eval_results = model.evaluate(X_test, y_test, verbose=0)
metrics_names = model.metrics_names
results_dict = {name: float(val) for name, val in zip(metrics_names, eval_results)}
print("Test-Ergebnisse:", results_dict)

In [None]:
wandb.log({f"test/{k}": v for k, v in results_dict.items()})

In [None]:
if os.path.exists(weights_path):
    model.load_weights(weights_path)

In [None]:
model.save(model_path)

In [None]:
with open(text_file_path, "w") as f:
    f.write("=== LSTM UV-Forecasting – Ergebnisse ===\n")
    f.write(f"Zeit: {dt.datetime.now()}\n\n")
    f.write("Konfigurationen:\n")
    f.write(f"model_config: {model_config}\n")
    f.write(f"training_config: {training_config}\n")
    f.write(f"fit_config: {fit_config}\n")
    f.write(f"Features (X): {columns_X}\nZiel (y): {columns_y}\n\n")
    f.write("Test-Performance:\n")
    for k, v in results_dict.items():
        f.write(f"  {k}: {v:.6f}\n")
    f.write("\nPfade:\n")
    f.write(f"  model_path:   {model_path}\n")
    f.write(f"  weights_path: {weights_path}\n")
    f.write(f"  test_X_csv:   {testdata_path_X}\n")
    f.write(f"  test_Y_csv:   {testdata_path_Y}\n")
    f.write(f"  scaler_X:     {scaler_x_path}\n")
    f.write(f"  scaler_y:     {scaler_y_path}\n")

wandb.finish()