In [13]:
# visualize.py (robust, startet ETL falls nötig)
import os
import json
import pandas as pd
import matplotlib.pyplot as plt

def timeseries_plot(df, out_dir='outputs'):
    os.makedirs(os.path.join(out_dir, 'plots'), exist_ok=True)
    plt.figure(figsize=(10,4))
    plt.plot(df['date'], df['target'], label='target')
    plt.title('Zielvariable über Zeit')
    plt.xlabel('Datum')
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, 'plots', 'timeseries_target.png'))
    plt.close()

def feature_importance_plot(fi, out_dir='outputs'):
    os.makedirs(os.path.join(out_dir, 'plots'), exist_ok=True)
    if not fi:
        with open(os.path.join(out_dir, 'plots', 'feature_importance_missing.txt'), 'w') as f:
            f.write("Keine Feature-Importances gefunden.")
        return
    items = sorted(fi.items(), key=lambda x: x[1], reverse=True)
    names = [i[0] for i in items]
    values = [i[1] for i in items]
    plt.figure(figsize=(8,4))
    plt.bar(range(len(names)), values)
    plt.xticks(range(len(names)), names, rotation=45, ha='right')
    plt.title('Feature Importances (numeric features approx.)')
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, 'plots', 'feature_importance.png'))
    plt.close()

def load_dataframe(out_dir='outputs'):
    csv_path = os.path.join(out_dir, 'sample_data.csv')
    db_path = os.path.join(out_dir, 'data.db')
    # 1) Versuche CSV
    if os.path.exists(csv_path):
        try:
            df = pd.read_csv(csv_path, parse_dates=['date'])
            return df
        except Exception as e:
            print(f"Warnung: CSV konnte nicht gelesen werden: {e}")
    # 2) Fallback auf SQLite DB
    if os.path.exists(db_path):
        try:
            import sqlite3
            conn = sqlite3.connect(db_path)
            df = pd.read_sql_query("SELECT * FROM observations", conn, parse_dates=['date'])
            conn.close()
            return df
        except Exception as e:
            print(f"Warnung: DB konnte nicht gelesen werden: {e}")
    # 3) Wenn beides fehlt: None zurückgeben (Aufrufer entscheidet)
    return None

if __name__ == '__main__':
    out = 'outputs'
    df = load_dataframe(out_dir=out)
    if df is None:
        # keine Outputs vorhanden — ETL automatisch ausführen
        try:
            print("Keine Outputs gefunden — starte ETL (etl.run_etl)...")
            from etl import run_etl
        except Exception as e:
            raise RuntimeError("etl.py nicht gefunden oder importiert: " + str(e))
        df, db_path = run_etl(out_dir=out)
        print("ETL abgeschlossen.")
    # nun Visualisierungen erstellen
    try:
        timeseries_plot(df, out_dir=out)
        print("Timeseries-Plot erstellt:", os.path.join(out, 'plots', 'timeseries_target.png'))
    except Exception as e:
        print("Fehler beim Timeseries-Plot:", e)

    # metrics.json auslesen, falls vorhanden
    metrics_path = os.path.join(out, 'metrics.json')
    fi = {}
    if os.path.exists(metrics_path):
        try:
            with open(metrics_path, 'r') as f:
                metrics = json.load(f)
                fi = metrics.get('feature_importance', {})
        except Exception as e:
            print("Warnung: metrics.json konnte nicht gelesen werden:", e)

    try:
        feature_importance_plot(fi, out_dir=out)
        if fi:
            print("Feature-Importance-Plot erstellt:", os.path.join(out, 'plots', 'feature_importance.png'))
    except Exception as e:
        print("Fehler beim Feature-Importance-Plot:", e)


Timeseries-Plot erstellt: outputs\plots\timeseries_target.png
