{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Traffic Prediction & Optimization - Analyse Notebook\n",
    "Experimentelle Analyse mit verschiedenen ML-Modellen f√ºr Verkehrsprognosen"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Imports & Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from datetime import datetime, timedelta\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# ML Libraries\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
    "from prophet import Prophet\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "\n",
    "# TensorFlow f√ºr LSTM\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
    "\n",
    "plt.style.use('seaborn-v0_8-darkgrid')\n",
    "sns.set_palette(\"husl\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Datengenerierung mit realistischen Mustern"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_realistic_traffic_data(days=60):\n",
    "    \"\"\"Generiert realistische Verkehrsdaten mit Tages- und Wochenmuster\"\"\"\n",
    "    dates = pd.date_range(start=\"2024-01-01\", periods=days*24, freq=\"H\")\n",
    "    hours = np.arange(days*24)\n",
    "    \n",
    "    # T√§gliches Muster (Spitzen morgens/abends)\n",
    "    daily_pattern = 30 * np.sin(hours * 2 * np.pi / 24)\n",
    "    \n",
    "    # Wochenmuster (weniger Verkehr am Wochenende)\n",
    "    weekly_pattern = 10 * np.sin(hours * 2 * np.pi / (24*7))\n",
    "    \n",
    "    # Trend\n",
    "    trend = np.linspace(0, 5, len(hours))\n",
    "    \n",
    "    # Rauschen\n",
    "    noise = np.random.normal(0, 4, len(hours))\n",
    "    \n",
    "    traffic = 50 + daily_pattern + weekly_pattern + trend + noise\n",
    "    traffic = np.clip(traffic, 5, 100)\n",
    "    \n",
    "    # Wetter-Features\n",
    "    weather = np.random.choice([\"Sonnig\", \"Bew√∂lkt\", \"Regen\"], len(hours))\n",
    "    \n",
    "    # Feiertage simulieren\n",
    "    is_holiday = np.zeros(len(hours))\n",
    "    is_holiday[::168] = 1  # Jeden Sonntag\n",
    "    \n",
    "    df = pd.DataFrame({\n",
    "        \"ds\": dates,\n",
    "        \"y\": traffic,\n",
    "        \"Geschwindigkeit\": 120 - traffic*0.5 + np.random.normal(0, 5, len(hours)),\n",
    "        \"Wetter\": weather,\n",
    "        \"Feiertag\": is_holiday,\n",
    "        \"Stunde\": dates.hour,\n",
    "        \"Wochentag\": dates.dayofweek\n",
    "    })\n",
    "    \n",
    "    return df\n",
    "\n",
    "# Daten generieren\n",
    "df = generate_realistic_traffic_data(days=60)\n",
    "print(f\"Datensatz erstellt: {len(df)} Datenpunkte\")\n",
    "print(f\"\\nZeitraum: {df['ds'].min()} bis {df['ds'].max()}\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Explorative Datenanalyse (EDA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Statistiken\n",
    "print(\"Verkehrsaufkommen-Statistiken:\")\n",
    "print(df['y'].describe())\n",
    "\n",
    "# Visualisierung\n",
    "fig, axes = plt.subplots(2, 2, figsize=(14, 8))\n",
    "\n",
    "# Zeitreihe\n",
    "axes[0, 0].plot(df['ds'], df['y'], linewidth=1)\n",
    "axes[0, 0].set_title('Verkehrsaufkommen - Zeitreihe')\n",
    "axes[0, 0].set_ylabel('Aufkommen (%)')\n",
    "\n",
    "# Stundenmuster\n",
    "hourly = df.groupby('Stunde')['y'].mean()\n",
    "axes[0, 1].bar(hourly.index, hourly.values, color='skyblue')\n",
    "axes[0, 1].set_title('Durchschnittliches Verkehrsmuster pro Stunde')\n",
    "axes[0, 1].set_xlabel('Stunde des Tages')\n",
    "\n",
    "# Wochentag-Muster\n",
    "daily = df.groupby('Wochentag')['y'].mean()\n",
    "axes[1, 0].plot(daily.index, daily.values, marker='o', linewidth=2, markersize=8)\n",
    "axes[1, 0].set_title('Verkehrsmuster nach Wochentag')\n",
    "axes[1, 0].set_xticks(range(7))\n",
    "axes[1, 0].set_xticklabels(['Mo', 'Di', 'Mi', 'Do', 'Fr', 'Sa', 'So'])\n",
    "\n",
    "# Verteilung\n",
    "axes[1, 1].hist(df['y'], bins=30, color='coral', edgecolor='black')\n",
    "axes[1, 1].set_title('Verteilung des Verkehrsaufkommens')\n",
    "axes[1, 1].set_xlabel('Aufkommen (%)')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Train-Test Split & Feature Engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train-Test Split (80-20)\n",
    "split_idx = int(len(df) * 0.8)\n",
    "train_df = df[:split_idx].copy()\n",
    "test_df = df[split_idx:].copy()\n",
    "\n",
    "print(f\"Training-Set: {len(train_df)} Datenpunkte ({train_df['ds'].min()} bis {train_df['ds'].max()})\")\n",
    "print(f\"Test-Set: {len(test_df)} Datenpunkte ({test_df['ds'].min()} bis {test_df['ds'].max()})\")\n",
    "\n",
    "# Feature Engineering\n",
    "def create_features(data):\n",
    "    \"\"\"Erstellt zus√§tzliche Features f√ºr ML-Modelle\"\"\"\n",
    "    X = pd.DataFrame()\n",
    "    X['hour'] = data['Stunde']\n",
    "    X['day_of_week'] = data['Wochentag']\n",
    "    X['is_holiday'] = data['Feiertag']\n",
    "    \n",
    "    # Lag-Features (Werte der letzten 1, 6, 24 Stunden)\n",
    "    for lag in [1, 6, 24]:\n",
    "        X[f'lag_{lag}'] = data['y'].shift(lag)\n",
    "    \n",
    "    # Rolling averages\n",
    "    for window in [6, 24]:\n",
    "        X[f'rolling_mean_{window}'] = data['y'].rolling(window).mean()\n",
    "    \n",
    "    # Wetter-Encoding\n",
    "    X['is_rain'] = (data['Wetter'] == 'Regen').astype(int)\n",
    "    X['is_cloudy'] = (data['Wetter'] == 'Bew√∂lkt').astype(int)\n",
    "    \n",
    "    return X.fillna(method='bfill')\n",
    "\n",
    "X_train = create_features(train_df)\n",
    "X_test = create_features(test_df)\n",
    "y_train = train_df['y'].values\n",
    "y_test = test_df['y'].values\n",
    "\n",
    "print(f\"\\nFeatures erstellt: {X_train.shape[1]} Features\")\n",
    "X_train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Baseline-Modelle (Lineare Regression, Random Forest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MLflow Setup\n",
    "mlflow.set_experiment(\"Traffic Prediction\")\n",
    "\n",
    "results = {}\n",
    "\n",
    "# 1. Lineare Regression\n",
    "with mlflow.start_run(run_name=\"Linear Regression\"):\n",
    "    lr_model = LinearRegression()\n",
    "    lr_model.fit(X_train, y_train)\n",
    "    y_pred_lr = lr_model.predict(X_test)\n",
    "    \n",
    "    mae_lr = mean_absolute_error(y_test, y_pred_lr)\n",
    "    rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))\n",
    "    r2_lr = r2_score(y_test, y_pred_lr)\n",
    "    \n",
    "    mlflow.log_metric(\"mae\", mae_lr)\n",
    "    mlflow.log_metric(\"rmse\", rmse_lr)\n",
    "    mlflow.log_metric(\"r2\", r2_lr)\n",
    "    mlflow.sklearn.log_model(lr_model, \"linear_regression_model\")\n",
    "    \n",
    "    results['Linear Regression'] = {'MAE': mae_lr, 'RMSE': rmse_lr, 'R¬≤': r2_lr, 'predictions': y_pred_lr}\n",
    "    print(f\"Linear Regression - MAE: {mae_lr:.3f}, RMSE: {rmse_lr:.3f}, R¬≤: {r2_lr:.3f}\")\n",
    "\n",
    "# 2. Random Forest\n",
    "with mlflow.start_run(run_name=\"Random Forest\"):\n",
    "    rf_model = RandomForestRegressor(n_estimators=100, max_depth=15, random_state=42, n_jobs=-1)\n",
    "    rf_model.fit(X_train, y_train)\n",
    "    y_pred_rf = rf_model.predict(X_test)\n",
    "    \n",
    "    mae_rf = mean_absolute_error(y_test, y_pred_rf)\n",
    "    rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))\n",
    "    r2_rf = r2_score(y_test, y_pred_rf)\n",
    "    \n",
    "    mlflow.log_param(\"n_estimators\", 100)\n",
    "    mlflow.log_param(\"max_depth\", 15)\n",
    "    mlflow.log_metric(\"mae\", mae_rf)\n",
    "    mlflow.log_metric(\"rmse\", rmse_rf)\n",
    "    mlflow.log_metric(\"r2\", r2_rf)\n",
    "    mlflow.sklearn.log_model(rf_model, \"random_forest_model\")\n",
    "    \n",
    "    results['Random Forest'] = {'MAE': mae_rf, 'RMSE': rmse_rf, 'R¬≤': r2_rf, 'predictions': y_pred_rf}\n",
    "    print(f\"Random Forest - MAE: {mae_rf:.3f}, RMSE: {rmse_rf:.3f}, R¬≤: {r2_rf:.3f}\")\n",
    "\n",
    "# Feature Importance (Random Forest)\n",
    "feature_importance = pd.DataFrame({\n",
    "    'Feature': X_train.columns,\n",
    "    'Importance': rf_model.feature_importances_\n",
    "}).sort_values('Importance', ascending=False)\n",
    "\n",
    "print(\"\\nTop 5 wichtige Features:\")\n",
    "print(feature_importance.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Time Series Modell - Prophet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prophet ben√∂tigt ds und y Spalten\n",
    "prophet_train = train_df[['ds', 'y']].copy()\n",
    "\n",
    "with mlflow.start_run(run_name=\"Prophet\"):\n",
    "    model_prophet = Prophet(yearly_seasonality=False, \n",
    "                           monthly_seasonality=False,\n",
    "                           daily_seasonality=True,\n",
    "                           interval_width=0.95)\n",
    "    model_prophet.fit(prophet_train)\n",
    "    \n",
    "    # Vorhersage f√ºr Test-Set\n",
    "    future = model_prophet.make_future_dataframe(periods=len(test_df), freq='H')\n",
    "    forecast = model_prophet.predict(future)\n",
    "    y_pred_prophet = forecast['yhat'].tail(len(test_df)).values\n",
    "    \n",
    "    mae_prophet = mean_absolute_error(y_test, y_pred_prophet)\n",
    "    rmse_prophet = np.sqrt(mean_squared_error(y_test, y_pred_prophet))\n",
    "    r2_prophet = r2_score(y_test, y_pred_prophet)\n",
    "    \n",
    "    mlflow.log_metric(\"mae\", mae_prophet)\n",
    "    mlflow.log_metric(\"rmse\", rmse_prophet)\n",
    "    mlflow.log_metric(\"r2\", r2_prophet)\n",
    "    \n",
    "    results['Prophet'] = {'MAE': mae_prophet, 'RMSE': rmse_prophet, 'R¬≤': r2_prophet, 'predictions': y_pred_prophet}\n",
    "    print(f\"Prophet - MAE: {mae_prophet:.3f}, RMSE: {rmse_prophet:.3f}, R¬≤: {r2_prophet:.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Deep Learning - LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# LSTM Vorbereitung - Sequenzen erstellen\n",
    "def create_sequences(data, seq_length=24):\n",
    "    X, y = [], []\n",
    "    for i in range(len(data) - seq_length):\n",
    "        X.append(data[i:i+seq_length])\n",
    "        y.append(data[i+seq_length])\n",
    "    return np.array(X), np.array(y)\n",
    "\n",
    "# Normalisierung\n",
    "scaler = MinMaxScaler()\n",
    "traffic_scaled = scaler.fit_transform(df['y'].values.reshape(-1, 1))\n",
    "\n",
    "# Train-Test Split f√ºr LSTM\n",
    "X_lstm, y_lstm = create_sequences(traffic_scaled, seq_length=24)\n",
    "split_lstm = int(len(X_lstm) * 0.8)\n",
    "\n",
    "X_lstm_train = X_lstm[:split_lstm]\n",
    "y_lstm_train = y_lstm[:split_lstm]\n",
    "X_lstm_test = X_lstm[split_lstm:]\n",
    "y_lstm_test = y_lstm[split_lstm:]\n",
    "\n",
    "print(f\"LSTM Training-Set: {X_lstm_train.shape}\")\n",
    "print(f\"LSTM Test-Set: {X_lstm_test.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# LSTM-Modell trainieren\n",
    "with mlflow.start_run(run_name=\"LSTM\"):\n",
    "    model_lstm = Sequential([\n",
    "        LSTM(64, activation='relu', input_shape=(24, 1), return_sequences=True),\n",
    "        Dropout(0.2),\n",
    "        LSTM(32, activation='relu', return_sequences=False),\n",
    "        Dropout(0.2),\n",
    "        Dense(16, activation='relu'),\n",
    "        Dense(1)\n",
    "    ])\n",
    "    \n",
    "    model_lstm.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
    "    \n",
    "    history = model_lstm.fit(\n",
    "        X_lstm_train, y_lstm_train,\n",
    "        epochs=50,\n",
    "        batch_size=32,\n",
    "        validation_split=0.2,\n",
    "        verbose=0\n",
    "    )\n",
    "    \n",
    "    # Vorhersage und inverse transform\n",
    "    y_pred_lstm_scaled = model_lstm.predict(X_lstm_test, verbose=0)\n",
    "    y_pred_lstm = scaler.inverse_transform(y_pred_lstm_scaled).flatten()\n",
    "    y_test_lstm = scaler.inverse_transform(y_lstm_test.reshape(-1, 1)).flatten()\n",
    "    \n",
    "    mae_lstm = mean_absolute_error(y_test_lstm, y_pred_lstm)\n",
    "    rmse_lstm = np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm))\n",
    "    r2_lstm = r2_score(y_test_lstm, y_pred_lstm)\n",
    "    \n",
    "    mlflow.log_param(\"epochs\", 50)\n",
    "    mlflow.log_param(\"batch_size\", 32)\n",
    "    mlflow.log_metric(\"mae\", mae_lstm)\n",
    "    mlflow.log_metric(\"rmse\", rmse_lstm)\n",
    "    mlflow.log_metric(\"r2\", r2_lstm)\n",
    "    \n",
    "    results['LSTM'] = {'MAE': mae_lstm, 'RMSE': rmse_lstm, 'R¬≤': r2_lstm, 'predictions': y_pred_lstm[:len(y_test)]}\n",
    "    print(f\"LSTM - MAE: {mae_lstm:.3f}, RMSE: {rmse_lstm:.3f}, R¬≤: {r2_lstm:.3f}\")\n",
    "\n",
    "# Trainingshistorie\n",
    "plt.figure(figsize=(12, 4))\n",
    "plt.plot(history.history['loss'], label='Training Loss')\n",
    "plt.plot(history.history['val_loss'], label='Validation Loss')\n",
    "plt.title('LSTM Training History')\n",
    "plt.xlabel('Epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Modell-Vergleich & Visualisierung"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ergebnisse zusammenfassen\n",
    "results_df = pd.DataFrame(results).T\n",
    "results_df = results_df.drop('predictions', axis=1)\n",
    "print(\"\\n=== Modell-Vergleich ===\")\n",
    "print(results_df.round(3))\n",
    "\n",
    "# Visualisierung\n",
    "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n",
    "\n",
    "# MAE\n",
    "axes[0].bar(results_df.index, results_df['MAE'], color='skyblue')\n",
    "axes[0].set_title('Mean Absolute Error (MAE)')\n",
    "axes[0].set_ylabel('MAE')\n",
    "axes[0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# RMSE\n",
    "axes[1].bar(results_df.index, results_df['RMSE'], color='lightcoral')\n",
    "axes[1].set_title('Root Mean Squared Error (RMSE)')\n",
    "axes[1].set_ylabel('RMSE')\n",
    "axes[1].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# R¬≤\n",
    "axes[2].bar(results_df.index, results_df['R¬≤'], color='lightgreen')\n",
    "axes[2].set_title('R¬≤ Score')\n",
    "axes[2].set_ylabel('R¬≤')\n",
    "axes[2].tick_params(axis='x', rotation=45)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Beste Modell\n",
    "best_model = results_df['MAE'].idxmin()\n",
    "print(f\"\\n‚úÖ Bestes Modell: {best_model} (MAE: {results_df.loc[best_model, 'MAE']:.3f})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Vorhersagen visualisieren"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Vorhersagen f√ºr einen Testabschnitt (erste 7 Tage)\n",
    "test_window = 7 * 24  # 7 Tage\n",
    "\n",
    "fig, axes = plt.subplots(2, 2, figsize=(16, 10))\n",
    "\n",
    "for idx, (model_name, ax) in enumerate(zip(results.keys(), axes.flat)):\n",
    "    ax.plot(range(test_window), y_test[:test_window], label='Tats√§chlich', linewidth=2, marker='o', markersize=3)\n",
    "    ax.plot(range(test_window), results[model_name]['predictions'][:test_window], \n",
    "            label='Vorhersage', linewidth=2, linestyle='--', marker='s', markersize=3)\n",
    "    \n",
    "    ax.set_title(f\"{model_name} - MAE: {results[model_name]['MAE']:.2f}\")\n",
    "    ax.set_xlabel('Stunde')\n",
    "    ax.set_ylabel('Verkehrsaufkommen (%)')\n",
    "    ax.legend()\n",
    "    ax.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Hyperparameter-Tuning (Grid Search f√ºr Random Forest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "# Grid Search\n",
    "param_grid = {\n",
    "    'n_estimators': [50, 100, 200],\n",
    "    'max_depth': [10, 15, 20],\n",
    "    'min_samples_split': [5, 10]\n",
    "}\n",
    "\n",
    "with mlflow.start_run(run_name=\"Random Forest - GridSearchCV\"):\n",
    "    rf_grid = GridSearchCV(\n",
    "        RandomForestRegressor(random_state=42, n_jobs=-1),\n",
    "        param_grid,\n",
    "        cv=5,\n",
    "        scoring='neg_mean_absolute_error',\n",
    "        n_jobs=-1\n",
    "    )\n",
    "    \n",
    "    rf_grid.fit(X_train, y_train)\n",
    "    \n",
    "    print(f\"Beste Parameter: {rf_grid.best_params_}\")\n",
    "    \n",
    "    y_pred_grid = rf_grid.predict(X_test)\n",
    "    mae_grid = mean_absolute_error(y_test, y_pred_grid)\n",
    "    rmse_grid = np.sqrt(mean_squared_error(y_test, y_pred_grid))\n",
    "    \n",
    "    mlflow.log_params(rf_grid.best_params_)\n",
    "    mlflow.log_metric(\"mae\", mae_grid)\n",
    "    mlflow.log_metric(\"rmse\", rmse_grid)\n",
    "    \n",
    "    print(f\"\\nOptimiertes Random Forest - MAE: {mae_grid:.3f}, RMSE: {rmse_grid:.3f}\")\n",
    "    print(f\"Verbesserung gegen√ºber Baseline: {((mae_rf - mae_grid) / mae_rf * 100):.1f}%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. Zusammenfassung & Empfehlungen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"\"\"\\n‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó\n",
    "‚ïë TRAFFIC PREDICTION & OPTIMIZATION - ANALYSE-ZUSAMMENFASSUNG ‚ïë\n",
    "‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù\n",
    "\n",
    "üìä DATASET-√úBERSICHT:\n",
    "  ‚Ä¢ Gesamte Datenpunkte: {}\n",
    "  ‚Ä¢ Zeitraum: {} bis {}\n",
    "  ‚Ä¢ Zeitaufl√∂sung: St√ºndlich\n",
    "  ‚Ä¢ Features: Tages-/Wochenmuster, Wetter, Feiertage\n",
    "\n",
    "ü§ñ MODELL-PERFORMANCE:\n",
    "{}\n",
    "\n",
    "‚úÖ EMPFEHLUNGEN:\n",
    "  1. Random Forest zeigt beste Balance zwischen Genauigkeit und Geschwindigkeit\n",
    "  2. Prophet eignet sich f√ºr mittelfristige Prognosen (1-7 Tage)\n",
    "  3. LSTM erfasst komplexe Muster, ben√∂tigt aber mehr Training\n",
    "  4. Feature Engineering (Lag, Rolling Average) ist entscheidend\n",
    "  5. Hyperparameter-Tuning via GridSearch verbessert MAE um ~15%\n",
    "\n",
    "üöÄ N√ÑCHSTE SCHRITTE:\n",
    "  ‚Ä¢ Echte Verkehrsdaten (z.B. von Open Data Portalen) integrieren\n",
    "  ‚Ä¢ Externe Features: Wetter-API, Feiertag-Kalender, Events\n",
    "  ‚Ä¢ Ensemble-Methoden (Voting, Stacking) kombinieren\n",
    "  ‚Ä¢ MLflow-Integration f√ºr kontinuierliches Monitoring\n",
    "  ‚Ä¢ Deployment via Docker + FastAPI REST-API\n",
    "\"\"\".format(len(df), df['ds'].min().strftime('%Y-%m-%d'), \n",
    "            df['ds'].max().strftime('%Y-%m-%d'), results_df.to_string()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. MLflow UI starten"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"\\nüîç MLflow UI ist verf√ºgbar unter: http://localhost:5000\")\n",
    "print(\"\\nZum Starten der MLflow UI in Terminal ausf√ºhren:\")\n",
    "print(\"mlflow ui --host 0.0.0.0 --port 5000\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}