In [41]:
import plotly.express as px
import plotly.subplots as sp
import numpy as np

# Visualization code starts here

In [42]:

de_prices.index = de_prices.index.tz_convert('UTC')
hu_prices.index = hu_prices.index.tz_convert('UTC')

merged_prices = de_prices.join(hu_prices, how='outer')
merged_prices['PRice Delta'] = merged_prices['HU_Price'] - merged_prices['DE_Price']
merged_prices['Price_Delta_HU_DE'] = merged_prices['HU_Price'] - merged_prices['DE_Price']

print(" merge ok.")
print(merged_prices.head())

 merge ok.
                           DE_Price  HU_Price  PRice Delta  Price_Delta_HU_DE
2025-07-06 22:00:00+00:00    118.84    117.99        -0.85              -0.85
2025-07-06 23:00:00+00:00    107.42    105.64        -1.78              -1.78
2025-07-07 00:00:00+00:00    101.92    100.98        -0.94              -0.94
2025-07-07 01:00:00+00:00     99.12     97.67        -1.45              -1.45
2025-07-07 02:00:00+00:00    101.00     99.34        -1.66              -1.66


In [43]:
# --- 3. Time Series Plot ---
fig_line = px.line(
    merged_prices,
    x=merged_prices.index,
    y=['DE_Price', 'HU_Price'],
    title='DAM Spot Prices: DE_LU vs. HU',
    labels={'value': 'Price (EUR/MWh)', 'timestamp': 'Date'},
    template='plotly_white'
)

# Week 29, 2025: Jul 14 - Jul 21
# Week 30, 2025: Jul 21 - Jul 28

# Use datetime objects that match the data format
week_29_start = pd.Timestamp("2025-07-14", tz="UTC")
week_29_end = pd.Timestamp("2025-07-21", tz="UTC")
week_30_end = pd.Timestamp("2025-07-28", tz="UTC")

# Add vertical lines without annotations to avoid compatibility issues
fig_line.add_vline(x=week_29_start, line_dash="dash", line_color="green")
fig_line.add_vline(x=week_29_end, line_dash="dash", line_color="orange")
fig_line.add_vline(x=week_30_end, line_dash="dash", line_color="red")

# Add a text annotation separately
fig_line.add_annotation(x=week_29_start, y=merged_prices[['DE_Price', 'HU_Price']].max().max(),
                       text="Week 29 Start", showarrow=True, arrowhead=2)

fig_line.show()

In [44]:
fig_box = px.box(
    merged_prices,
    y=['DE_Price', 'HU_Price'],
    title="Sktala z brki box plot",
    labels={'value': 'Price (EUR/MWh)', 'variable': 'Country'},
    points='all', 
    template='plotly_dark'  

)

fig_box.show()

In [45]:
fig_delta = px.line(
    merged_prices,
    x=merged_prices.index,
    y='Price_Delta_HU_DE',
    title='Spread: HU - DE',
    labels={'Price_Delta_HU_DE': 'Price Delta (EUR/MWh)', 'timestamp': 'Date'},
    template='plotly_dark'
)

fig_delta.add_hline(y=0, line_dash="dash", line_color="red")

# Add week boundary markers
week_29_start = pd.Timestamp("2025-07-14", tz="UTC")
week_29_end = pd.Timestamp("2025-07-21", tz="UTC")
week_30_end = pd.Timestamp("2025-07-28", tz="UTC")

# Add vertical lines for week boundaries
fig_delta.add_vline(x=week_29_start, line_dash="dash", line_color="green")
fig_delta.add_vline(x=week_29_end, line_dash="dash", line_color="orange")
fig_delta.add_vline(x=week_30_end, line_dash="dash", line_color="red")

fig_delta.show()

In [46]:
from plotly.subplots import make_subplots

week_29_start = pd.Timestamp("2025-07-14", tz="UTC")
week_29_end = pd.Timestamp("2025-07-21", tz="UTC")
week_30_start = pd.Timestamp("2025-07-21", tz="UTC")
week_30_end = pd.Timestamp("2025-07-28", tz="UTC")

week_29_data = merged_prices.loc[week_29_start:week_29_end]
week_30_data = merged_prices.loc[week_30_start:week_30_end]

fig_weeks = make_subplots(
    rows=2, cols=1,
    subplot_titles=("week 29 (July 14-21, 2025)", "week 30 (July 21-28, 2025)"),
    vertical_spacing=0.1,
    shared_xaxes=False
)

# Wk29 plot (top)
fig_weeks.add_scatter(x=week_29_data.index, y=week_29_data['DE_Price'], 
                     name='DE Price', line=dict(color='blue'), row=1, col=1)
fig_weeks.add_scatter(x=week_29_data.index, y=week_29_data['HU_Price'], 
                     name='HU Price', line=dict(color='red'), row=1, col=1)

# Wk30 plot (bottom)
fig_weeks.add_scatter(x=week_30_data.index, y=week_30_data['DE_Price'], 
                     name='DE Price', line=dict(color='blue'), 
                     showlegend=False, row=2, col=1)
fig_weeks.add_scatter(x=week_30_data.index, y=week_30_data['HU_Price'], 
                     name='HU Price', line=dict(color='red'), 
                     showlegend=False, row=2, col=1)

fig_weeks.update_layout(
    title="DAM spot prices: week/week picture",
    height=600,
    template='plotly_white'
)

fig_weeks.update_yaxes(title_text="Price (EUR/MWh)", row=1, col=1)
fig_weeks.update_yaxes(title_text="Price (EUR/MWh)", row=2, col=1)

fig_weeks.update_xaxes(title_text="Date", row=2, col=1)

fig_weeks.show()

In [47]:

fig_spread = make_subplots(
    rows=2, cols=1,
    subplot_titles=("Week 29 Spread: HU - DE (July 14-21, 2025)", "Week 30 Spread: HU - DE (July 21-28, 2025)"),
    vertical_spacing=0.1,
    shared_xaxes=False
)

fig_spread.add_scatter(x=week_29_data.index, y=week_29_data['Price_Delta_HU_DE'], 
                      name='HU - DE Spread', line=dict(color='purple'), row=1, col=1)

fig_spread.add_scatter(x=week_30_data.index, y=week_30_data['Price_Delta_HU_DE'], 
                      name='HU - DE Spread', line=dict(color='purple'), 
                      showlegend=False, row=2, col=1)

fig_spread.add_hline(y=0, line_dash="dash", line_color="red", row=1, col=1)
fig_spread.add_hline(y=0, line_dash="dash", line_color="red", row=2, col=1)

fig_spread.update_layout(
    title="Spread Analysis: Week-by-Week Comparison",
    height=600,
    template='plotly_white'
)

fig_spread.update_yaxes(title_text="Price Delta (ERU/MWh)", row=1, col=1)
fig_spread.update_yaxes(title_text="Price Delta (EUR/MWh)", row=2, col=1)

fig_spread.update_xaxes(title_text="Date", row=2, col=1)

fig_spread.show()

In [48]:
from scipy import stats
import plotly.graph_objects as go
from plotly.subplots import make_subplots

spread_data = merged_prices['Price_Delta_HU_DE'].dropna()
week_29_spread = week_29_data['Price_Delta_HU_DE'].dropna()
week_30_spread = week_30_data['Price_Delta_HU_DE'].dropna()

def calculate_risk_metrics(data, name):
    metrics = {
        'mean': data.mean(),
        'std': data.std(),
        'skewness': stats.skew(data),
        'kurtosis': stats.kurtosis(data),
        'var_95': np.percentile(data, 5),  #
        'var_99': np.percentile(data, 1),  # 1% VaR (extreme left tail)
        'cvar_95': data[data <= np.percentile(data, 5)].mean(),  # Conditional VaR
        'max_loss': data.min(),
        'max_gain': data.max(),
    }
    print(f"\n{name} Risk Metrics:")
    print(f"  Mean: {metrics['mean']:.2f} EUR/MWh")
    print(f"  Std Dev: {metrics['std']:.2f} EUR/MWh")
    print(f"  Skewness: {metrics['skewness']:.3f} ({'left' if metrics['skewness'] < 0 else 'right'} tail)")
    print(f"  Kurtosis: {metrics['kurtosis']:.3f} ({'fat' if metrics['kurtosis'] > 0 else 'thin'} tails)")
    print(f"  95% VaR: {metrics['var_95']:.2f} EUR/MWh")
    print(f"  99% VaR: {metrics['var_99']:.2f} EUR/MWh")
    print(f"  95% CVaR: {metrics['cvar_95']:.2f} EUR/MWh")
    print(f"  Max Loss: {metrics['max_loss']:.2f} EUR/MWh")
    print(f"  Max Gain: {metrics['max_gain']:.2f} EUR/MWh")
    return metrics

print("=== FAT TAILS ===")
full_metrics = calculate_risk_metrics(spread_data, "Full Period")
w29_metrics = calculate_risk_metrics(week_29_spread, "Week 29")
w30_metrics = calculate_risk_metrics(week_30_spread, "Week 30")

# Create comprehensive risk visualization
fig_risk = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        "Probability Density Function (PDF)", 
        "Quantile-Quantile Plot vs Normal",
        "Tail Risk Heatmap", 
        "Extreme Value Timeline"
    ),
    specs=[[{"secondary_y": False}, {"secondary_y": False}],
           [{"secondary_y": False}, {"secondary_y": False}]]
)

# 1. PDF with normal overlay
x_range = np.linspace(spread_data.min(), spread_data.max(), 100)
kde = stats.gaussian_kde(spread_data)
pdf_values = kde(x_range)

normal_pdf = stats.norm.pdf(x_range, spread_data.mean(), spread_data.std())

fig_risk.add_trace(
    go.Scatter(x=x_range, y=pdf_values, name='Actual PDF', line=dict(color='purple', width=3)),
    row=1, col=1
)
fig_risk.add_trace(
    go.Scatter(x=x_range, y=normal_pdf, name='Normal PDF', line=dict(color='gray', dash='dash')),
    row=1, col=1
)

# Add VaR lines
fig_risk.add_vline(x=np.percentile(spread_data, 5), line_dash="dot", line_color="red", 
                   annotation_text="95% VaR", row=1, col=1)
fig_risk.add_vline(x=np.percentile(spread_data, 1), line_dash="solid", line_color="darkred",
                   annotation_text="99% VaR", row=1, col=1)

sorted_data = np.sort(spread_data)
theoretical_quantiles = stats.norm.ppf(np.linspace(0.01, 0.99, len(sorted_data)), 
                                      spread_data.mean(), spread_data.std())

fig_risk.add_trace(
    go.Scatter(x=theoretical_quantiles, y=sorted_data, mode='markers', 
               name='Q-Q Plot', marker=dict(color='blue', size=4)),
    row=1, col=2
)
fig_risk.add_trace(
    go.Scatter(x=[theoretical_quantiles.min(), theoretical_quantiles.max()],
               y=[theoretical_quantiles.min(), theoretical_quantiles.max()],
               mode='lines', name='Perfect Normal', line=dict(color='red', dash='dash')),
    row=1, col=2
)

# 3. Tail Risk Heatmap (percentiles by week)
percentiles = [1, 5, 10, 25, 50, 75, 90, 95, 99]
weeks_data = [week_29_spread, week_30_spread]
week_names = ['Week 29', 'Week 30']

heatmap_data = []
for week_data in weeks_data:
    heatmap_data.append([np.percentile(week_data, p) for p in percentiles])

fig_risk.add_trace(
    go.Heatmap(
        z=heatmap_data,
        x=[f'{p}%' for p in percentiles],
        y=week_names,
        colorscale='RdBu_r',
        showscale=True,
        text=[[f'{val:.1f}' for val in row] for row in heatmap_data],
        texttemplate='%{text}',
        textfont={"size": 10}
    ),
    row=2, col=1
)

extreme_threshold = np.percentile(np.abs(spread_data), 95)  # Top 5% extreme values
extreme_mask = np.abs(merged_prices['Price_Delta_HU_DE']) >= extreme_threshold
extreme_times = merged_prices[extreme_mask].index
extreme_values = merged_prices[extreme_mask]['Price_Delta_HU_DE']

fig_risk.add_trace(
    go.Scatter(x=extreme_times, y=extreme_values, mode='markers',
               name='Extreme Events', marker=dict(color='red', size=8, symbol='x')),
    row=2, col=2
)
fig_risk.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=2)

# new layout
fig_risk.update_layout(
    title="Fat Tail Risk Analysis: HU-DE Spread",
    height=800,
    showlegend=True,
    template='plotly_white'
)

fig_risk.update_xaxes(title_text="Spread (EUR/MWh)", row=1, col=1)
fig_risk.update_yaxes(title_text="Density", row=1, col=1)
fig_risk.update_xaxes(title_text="Theoretical Quantiles", row=1, col=2)
fig_risk.update_yaxes(title_text="Sample Quantiles", row=1, col=2)
fig_risk.update_xaxes(title_text="Percentile", row=2, col=1)
fig_risk.update_xaxes(title_text="Time", row=2, col=2)
fig_risk.update_yaxes(title_text="Spread (EUR/MWh)", row=2, col=2)

fig_risk.show()

print(f"\n=== TAIL RISK INSIGHTS ===")
print(f"  - Kurtosis > 0: {'YES' if full_metrics['kurtosis'] > 0 else 'NO'} ({full_metrics['kurtosis']:.3f})")
print(f"  - Extreme events (>95th %ile): {(np.abs(spread_data) >= extreme_threshold).sum()} out of {len(spread_data)}")
print(f"  - Tail ratio (99%/95% VaR): {abs(full_metrics['var_99']/full_metrics['var_95']):.2f}")

print(f"\nWorst Case Scenarios:")
worst_negative = spread_data.nsmallest(3)
worst_positive = spread_data.nlargest(3)
print(f"  - Top 3 negative spikes: {worst_negative.values}")
print(f"  - Top 3 positive spikes: {worst_positive.values}")

print(f"\nWeek Comparison:")
print(f"  - Week 29 vs 30 volatility ratio: {w29_metrics['std']/w30_metrics['std']:.2f}")
print(f"  - Week 29 vs 30 skewness: {w29_metrics['skewness']:.3f} vs {w30_metrics['skewness']:.3f}")

=== FAT TAILS ===

Full Period Risk Metrics:
  Mean: 13.84 EUR/MWh
  Std Dev: 29.65 EUR/MWh
  Skewness: 4.078 (right tail)
  Kurtosis: 24.210 (fat tails)
  95% VaR: -4.62 EUR/MWh
  99% VaR: -10.98 EUR/MWh
  95% CVaR: -9.32 EUR/MWh
  Max Loss: -32.67 EUR/MWh
  Max Gain: 276.75 EUR/MWh

Week 29 Risk Metrics:
  Mean: 9.76 EUR/MWh
  Std Dev: 16.03 EUR/MWh
  Skewness: 1.627 (right tail)
  Kurtosis: 3.231 (fat tails)
  95% VaR: -3.98 EUR/MWh
  99% VaR: -6.81 EUR/MWh
  95% CVaR: -6.20 EUR/MWh
  Max Loss: -10.21 EUR/MWh
  Max Gain: 88.02 EUR/MWh

Week 30 Risk Metrics:
  Mean: 27.15 EUR/MWh
  Std Dev: 47.15 EUR/MWh
  Skewness: 2.927 (right tail)
  Kurtosis: 9.797 (fat tails)
  95% VaR: -5.19 EUR/MWh
  99% VaR: -11.11 EUR/MWh
  95% CVaR: -8.91 EUR/MWh
  Max Loss: -15.81 EUR/MWh
  Max Gain: 276.75 EUR/MWh



=== TAIL RISK INSIGHTS ===
  - Kurtosis > 0: YES (24.210)
  - Extreme events (>95th %ile): 34 out of 673
  - Tail ratio (99%/95% VaR): 2.38

Worst Case Scenarios:
  - Top 3 negative spikes: [-32.67 -29.05 -17.4 ]
  - Top 3 positive spikes: [276.75 244.92 225.83]

Week Comparison:
  - Week 29 vs 30 volatility ratio: 0.34
  - Week 29 vs 30 skewness: 1.627 vs 2.927


In [49]:
import requests
import pandas as pd
import logging
import time
import os

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define representative cities for Hungary and Germany
CITIES = {
    "DE": ["Berlin", "Munich", "Frankfurt"],  
    "HU": ["Budapest", "Debrecen", "Szeged"]
}

# 29 & 30 of 2025
START_DATE = "2025-07-14"
END_DATE = "2025-07-28"

# API endpoints
GEOCODING_API_URL = "https://geocoding-api.open-meteo.com/v1/search"
HISTORICAL_API_URL = "https://archive-api.open-meteo.com/v1/archive"

OUTPUT_HOURLY_CSV = "hourly_weather_HU_DE_weeks29_30.csv"
OUTPUT_DAILY_CSV = "daily_max_weather_HU_DE_weeks29_30.csv"

def get_city_coordinates(city_name: str, country_code: str) -> tuple[float, float] | None:
    """
    Fetches latitude and longitude for a city using the Open-Meteo Geocoding API.
    """
    try:
        params = {'name': city_name, 'count': 1, 'language': 'en', 'format': 'json'}
        response = requests.get(GEOCODING_API_URL, params=params, timeout=10)
        response.raise_for_status()
        
        data = response.json()
        if 'results' in data:
            for result in data['results']:
                if result.get('country_code', '').upper() == country_code.upper():
                    return result['latitude'], result['longitude']
        
        logging.warning(f"Could not find coordinates for {city_name}, {country_code}.")
        return None
    except requests.exceptions.RequestException as e:
        logging.error(f"Geocoding request failed for {city_name}: {e}")
        return None

def fetch_hourly_weather_data(latitude: float, longitude: float, start_date: str, end_date: str) -> pd.DataFrame | None:
    """
    Fetches historical hourly average temperature for a given location and date range.
    """
    try:
        params = {
            'latitude': latitude,
            'longitude': longitude,
            'start_date': start_date,
            'end_date': end_date,
            'hourly': 'temperature_2m',  # Hourly temperature
            'timezone': 'Europe/Berlin'  # Use Central European Time
        }
        response = requests.get(HISTORICAL_API_URL, params=params, timeout=30)
        response.raise_for_status()

        data = response.json()
        if 'hourly' not in data or 'time' not in data['hourly']:
            logging.error(f"No hourly data available for lat:{latitude}, lon:{longitude}")
            return None

        # Convert to DataFrame
        df = pd.DataFrame(data['hourly'])
        df.rename(columns={
            'time': 'datetime',
            'temperature_2m': 'temperature'
        }, inplace=True)
        
        # Convert datetime to pandas datetime
        df['datetime'] = pd.to_datetime(df['datetime'])
        
        return df

    except requests.exceptions.RequestException as e:
        logging.error(f"Hourly weather request failed for lat:{latitude}, lon:{longitude}: {e}")
        return None
    except (KeyError, TypeError) as e:
        logging.error(f"Error parsing hourly weather data: {e}")
        return None

def fetch_daily_max_weather_data(latitude: float, longitude: float, start_date: str, end_date: str) -> pd.DataFrame | None:
    """
    Fetches historical daily max temperature for a given location and date range.
    """
    try:
        params = {
            'latitude': latitude,
            'longitude': longitude,
            'start_date': start_date,
            'end_date': end_date,
            'daily': 'temperature_2m_max',  # Daily maximum temperature
            'timezone': 'Europe/Berlin'
        }
        response = requests.get(HISTORICAL_API_URL, params=params, timeout=30)
        response.raise_for_status()

        data = response.json()
        if 'daily' not in data or 'time' not in data['daily']:
            logging.error(f"No daily data available for lat:{latitude}, lon:{longitude}")
            return None

        # Convert to DataFrame
        df = pd.DataFrame(data['daily'])
        df.rename(columns={
            'time': 'date',
            'temperature_2m_max': 'max_temperature'
        }, inplace=True)
        
        # Convert date to pandas datetime
        df['date'] = pd.to_datetime(df['date'])
        
        return df

    except requests.exceptions.RequestException as e:
        logging.error(f"Daily weather request failed for lat:{latitude}, lon:{longitude}: {e}")
        return None
    except (KeyError, TypeError) as e:
        logging.error(f"Error parsing daily weather data: {e}")
        return None

def main():
    """
    Main function to fetch hourly and daily weather data for weeks 29-30 of 2025.
    """
    logging.info("Starting weather data fetching for weeks 29-30, 2025...")
    hourly_data = []
    daily_data = []

    for country_code, cities in CITIES.items():
        for city in cities:
            logging.info(f"Processing {city}, {country_code}...")
            
            # Get coordinates
            coords = get_city_coordinates(city, country_code)
            if not coords:
                continue
            
            lat, lon = coords
            logging.info(f"Coordinates for {city}: Lat={lat:.4f}, Lon={lon:.4f}")
            
            # Fetch hourly data
            hourly_df = fetch_hourly_weather_data(lat, lon, START_DATE, END_DATE)
            if hourly_df is not None and not hourly_df.empty:
                hourly_df['city'] = city
                hourly_df['country'] = country_code
                hourly_data.append(hourly_df)
                logging.info(f"Hourly data fetched for {city}: {len(hourly_df)} records")
            
            # Fetch daily max data
            daily_df = fetch_daily_max_weather_data(lat, lon, START_DATE, END_DATE)
            if daily_df is not None and not daily_df.empty:
                daily_df['city'] = city
                daily_df['country'] = country_code
                daily_data.append(daily_df)
                logging.info(f"Daily max data fetched for {city}: {len(daily_df)} records")
            
            # Respectful delay
            time.sleep(1)

    # Process and save hourly data
    if hourly_data:
        final_hourly_df = pd.concat(hourly_data, ignore_index=True)
        final_hourly_df = final_hourly_df[['country', 'city', 'datetime', 'temperature']]
        final_hourly_df.to_csv(OUTPUT_HOURLY_CSV, index=False)
        
        # Calculate country averages by hour
        hourly_country_avg = final_hourly_df.groupby(['country', 'datetime'])['temperature'].mean().reset_index()
        hourly_country_avg['temperature'] = hourly_country_avg['temperature'].round(2)
        
        print(f"\n=== HOURLY TEMPERATURE AVERAGES ===")
        print(f"Total hourly records: {len(final_hourly_df)}")
        print(f"Date range: {final_hourly_df['datetime'].min()} to {final_hourly_df['datetime'].max()}")
        print("\nSample hourly country averages:")
        print(hourly_country_avg.head(10))
        
        logging.info(f"Hourly data saved to {OUTPUT_HOURLY_CSV}")
    
    # Process and save daily max data
    if daily_data:
        final_daily_df = pd.concat(daily_data, ignore_index=True)
        final_daily_df = final_daily_df[['country', 'city', 'date', 'max_temperature']]
        final_daily_df.to_csv(OUTPUT_DAILY_CSV, index=False)
        
        # Calculate country averages by day
        daily_country_avg = final_daily_df.groupby(['country', 'date'])['max_temperature'].mean().reset_index()
        daily_country_avg['max_temperature'] = daily_country_avg['max_temperature'].round(2)
        
        print(f"\n=== DAILY MAX TEMPERATURE AVERAGES ===")
        print(f"Total daily records: {len(final_daily_df)}")
        print("\nDaily country averages:")
        print(daily_country_avg)
        
        # Overall period averages
        period_avg = final_daily_df.groupby('country')['max_temperature'].agg(['mean', 'min', 'max']).round(2)
        print(f"\n=== WEEKS 29-30 SUMMARY ===")
        print("Average daily max temperatures by country:")
        print(period_avg)
        
        logging.info(f"Daily max data saved to {OUTPUT_DAILY_CSV}")
    
    if not hourly_data and not daily_data:
        logging.warning("No weather data was collected.")
    
    return final_hourly_df if hourly_data else None, final_daily_df if daily_data else None

# Execute the data fetching
hourly_weather, daily_weather = main()

2025-08-04 23:32:53,062 - INFO - Starting weather data fetching for weeks 29-30, 2025...
2025-08-04 23:32:53,062 - INFO - Processing Berlin, DE...
2025-08-04 23:32:53,062 - INFO - Processing Berlin, DE...
2025-08-04 23:32:53,160 - INFO - Coordinates for Berlin: Lat=52.5244, Lon=13.4105
2025-08-04 23:32:53,160 - INFO - Coordinates for Berlin: Lat=52.5244, Lon=13.4105
2025-08-04 23:32:53,277 - INFO - Hourly data fetched for Berlin: 360 records
2025-08-04 23:32:53,277 - INFO - Hourly data fetched for Berlin: 360 records
2025-08-04 23:32:53,379 - INFO - Daily max data fetched for Berlin: 15 records
2025-08-04 23:32:53,379 - INFO - Daily max data fetched for Berlin: 15 records
2025-08-04 23:32:54,380 - INFO - Processing Munich, DE...
2025-08-04 23:32:54,380 - INFO - Processing Munich, DE...
2025-08-04 23:32:54,471 - INFO - Coordinates for Munich: Lat=48.1374, Lon=11.5755
2025-08-04 23:32:54,471 - INFO - Coordinates for Munich: Lat=48.1374, Lon=11.5755
2025-08-04 23:32:54,585 - INFO - Hourly


=== HOURLY TEMPERATURE AVERAGES ===
Total hourly records: 2160
Date range: 2025-07-14 00:00:00 to 2025-07-28 23:00:00

Sample hourly country averages:
  country            datetime  temperature
0      DE 2025-07-14 00:00:00        18.20
1      DE 2025-07-14 01:00:00        17.80
2      DE 2025-07-14 02:00:00        17.43
3      DE 2025-07-14 03:00:00        16.70
4      DE 2025-07-14 04:00:00        16.13
5      DE 2025-07-14 05:00:00        15.90
6      DE 2025-07-14 06:00:00        16.17
7      DE 2025-07-14 07:00:00        17.53
8      DE 2025-07-14 08:00:00        19.30
9      DE 2025-07-14 09:00:00        21.10

=== DAILY MAX TEMPERATURE AVERAGES ===
Total daily records: 90

Daily country averages:
   country       date  max_temperature
0       DE 2025-07-14            24.33
1       DE 2025-07-15            23.50
2       DE 2025-07-16            21.70
3       DE 2025-07-17            19.73
4       DE 2025-07-18            23.57
5       DE 2025-07-19            27.30
6       DE 20

In [54]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

try:
    hourly_weather = pd.read_csv("../data/raw/hourly_weather_HU_DE_weeks29_30.csv")
    daily_weather = pd.read_csv("../data/raw/daily_max_weather_HU_DE_weeks29_30.csv")
    
    hourly_weather['datetime'] = pd.to_datetime(hourly_weather['datetime'])
    daily_weather['date'] = pd.to_datetime(daily_weather['date'])
    
    print(f"Hourly records: {len(hourly_weather)}")
    print(f"Daily records: {len(daily_weather)}")
    
except FileNotFoundError:
    print("data not found, run fetching first")
    hourly_weather = None
    daily_weather = None

if hourly_weather is not None and daily_weather is not None:
    
    hourly_country_avg = hourly_weather.groupby(['country', 'datetime'])['temperature'].mean().reset_index()
    hourly_country_avg['temperature'] = hourly_country_avg['temperature'].round(2)
    
    daily_country_avg = daily_weather.groupby(['country', 'date'])['max_temperature'].mean().reset_index()
    daily_country_avg['max_temperature'] = daily_country_avg['max_temperature'].round(2)
    
    de_hourly_temp = hourly_country_avg[hourly_country_avg['country'] == 'DE'].copy()
    hu_hourly_temp = hourly_country_avg[hourly_country_avg['country'] == 'HU'].copy()
    
    de_daily_max = daily_country_avg[daily_country_avg['country'] == 'DE'].copy()
    hu_daily_max = daily_country_avg[daily_country_avg['country'] == 'HU'].copy()
    
    de_hourly_temp['datetime'] = pd.to_datetime(de_hourly_temp['datetime']).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    hu_hourly_temp['datetime'] = pd.to_datetime(hu_hourly_temp['datetime']).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    de_daily_max['date'] = pd.to_datetime(de_daily_max['date']).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    hu_daily_max['date'] = pd.to_datetime(hu_daily_max['date']).dt.tz_localize('Europe/Berlin').dt.tz_convert('UTC')
    
    print("\nWeather data processed and timezone converted!")
    print(f"DE hourly temperature range: {de_hourly_temp['temperature'].min():.1f}°C to {de_hourly_temp['temperature'].max():.1f}°C")
    print(f"HU hourly temperature range: {hu_hourly_temp['temperature'].min():.1f}°C to {hu_hourly_temp['temperature'].max():.1f}°C")
    print(f"DE daily max temperature range: {de_daily_max['max_temperature'].min():.1f}°C to {de_daily_max['max_temperature'].max():.1f}°C")
    print(f"HU daily max temperature range: {hu_daily_max['max_temperature'].min():.1f}°C to {hu_daily_max['max_temperature'].max():.1f}°C")
    
    #  price, TEMP visual ---
    
    # 1. Overall time series with dual y-axis (price + temperature)
    fig_temp_price = make_subplots(
        rows=1, cols=1,
        specs=[[{"secondary_y": True}]],
        subplot_titles=("price vs temp: weeks 29-30, 2025",)
    )
    
    # Add price data (primary y-axis)
    fig_temp_price.add_trace(
        go.Scatter(x=merged_prices.index, y=merged_prices['DE_Price'], 
                  name='DE Price', line=dict(color='blue', width=2)),
        secondary_y=False
    )
    fig_temp_price.add_trace(
        go.Scatter(x=merged_prices.index, y=merged_prices['HU_Price'], 
                  name='HU Price', line=dict(color='red', width=2)),
        secondary_y=False
    )
    
    # Add hourly temperature data (secondary y-axis)
    fig_temp_price.add_trace(
        go.Scatter(x=de_hourly_temp['datetime'], y=de_hourly_temp['temperature'], 
                  name='DE Temperature', line=dict(color='lightblue', width=1, dash='dot')),
        secondary_y=True
    )
    fig_temp_price.add_trace(
        go.Scatter(x=hu_hourly_temp['datetime'], y=hu_hourly_temp['temperature'], 
                  name='HU Temperature', line=dict(color='pink', width=1, dash='dot')),
        secondary_y=True
    )
    
    # Add week boundaries
    fig_temp_price.add_vline(x=week_29_start, line_dash="dash", line_color="green")
    fig_temp_price.add_vline(x=week_29_end, line_dash="dash", line_color="orange") 
    fig_temp_price.add_vline(x=week_30_end, line_dash="dash", line_color="red")
    
    # Update axes
    fig_temp_price.update_yaxes(title_text="Price (EUR/MWh)", secondary_y=False)
    fig_temp_price.update_yaxes(title_text="Temperature (°C)", secondary_y=True)
    fig_temp_price.update_xaxes(title_text="Date")
    
    fig_temp_price.update_layout(
        title="Price vs vs TEMP: Weeks 28-31, 2025",
        height=600,
        template='plotly_white',
        hovermode='x unified'
    )
    
    fig_temp_price.show()
    
    # 2. Enhanced spread analysis with temperature
    fig_spread_temp = make_subplots(
        rows=2, cols=1,
        specs=[[{"secondary_y": True}], [{"secondary_y": True}]],
        subplot_titles=("Week 29: Spread vs Temperature", "Week 30: Spread vs Temperature"),
        vertical_spacing=0.1
    )
    
    # Week 29 data
    week_29_de_temp = de_hourly_temp[(de_hourly_temp['datetime'] >= week_29_start) & 
                                    (de_hourly_temp['datetime'] <= week_29_end)]
    week_29_hu_temp = hu_hourly_temp[(hu_hourly_temp['datetime'] >= week_29_start) & 
                                    (hu_hourly_temp['datetime'] <= week_29_end)]
    
    # Week 30 data  
    week_30_de_temp = de_hourly_temp[(de_hourly_temp['datetime'] >= week_30_start) & 
                                    (de_hourly_temp['datetime'] <= week_30_end)]
    week_30_hu_temp = hu_hourly_temp[(hu_hourly_temp['datetime'] >= week_30_start) & 
                                    (hu_hourly_temp['datetime'] <= week_30_end)]
    
    # Calculate temperature deltas for each week
    week_29_temp_delta = pd.merge(week_29_de_temp, week_29_hu_temp, on='datetime', suffixes=('_de', '_hu'))
    week_29_temp_delta['temp_delta'] = week_29_temp_delta['temperature_hu'] - week_29_temp_delta['temperature_de']
    
    week_30_temp_delta = pd.merge(week_30_de_temp, week_30_hu_temp, on='datetime', suffixes=('_de', '_hu'))
    week_30_temp_delta['temp_delta'] = week_30_temp_delta['temperature_hu'] - week_30_temp_delta['temperature_de']
    
    # Week 29 plots
    fig_spread_temp.add_trace(
        go.Scatter(x=week_29_data.index, y=week_29_data['Price_Delta_HU_DE'], 
                  name='Spread (HU-DE)', line=dict(color='purple', width=2)),
        row=1, col=1, secondary_y=False
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_29_de_temp['datetime'], y=week_29_de_temp['temperature'], 
                  name='DE Temp', line=dict(color='lightblue', dash='dot')),
        row=1, col=1, secondary_y=True
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_29_hu_temp['datetime'], y=week_29_hu_temp['temperature'], 
                  name='HU Temp', line=dict(color='pink', dash='dot')),
        row=1, col=1, secondary_y=True
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_29_temp_delta['datetime'], y=week_29_temp_delta['temp_delta'], 
                  name='Temp Delta (HU-DE)', line=dict(color='orange', width=2)),
        row=1, col=1, secondary_y=True
    )
    
    # Week 30 plots
    fig_spread_temp.add_trace(
        go.Scatter(x=week_30_data.index, y=week_30_data['Price_Delta_HU_DE'], 
                  name='Spread (HU-DE)', line=dict(color='purple', width=2), showlegend=False),
        row=2, col=1, secondary_y=False
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_30_de_temp['datetime'], y=week_30_de_temp['temperature'], 
                  name='DE Temp', line=dict(color='lightblue', dash='dot'), showlegend=False),
        row=2, col=1, secondary_y=True
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_30_hu_temp['datetime'], y=week_30_hu_temp['temperature'], 
                  name='HU Temp', line=dict(color='pink', dash='dot'), showlegend=False),
        row=2, col=1, secondary_y=True
    )
    fig_spread_temp.add_trace(
        go.Scatter(x=week_30_temp_delta['datetime'], y=week_30_temp_delta['temp_delta'], 
                  name='Temp Delta (HU-DE)', line=dict(color='orange', width=2), showlegend=False),
        row=2, col=1, secondary_y=True
    )
    
    # y = 0 premica
    fig_spread_temp.add_hline(y=0, line_dash="dash", line_color="gray", row=1, col=1)
    fig_spread_temp.add_hline(y=0, line_dash="dash", line_color="gray", row=2, col=1)
    
    # 
    fig_spread_temp.update_yaxes(title_text="Spread (EUR/MWh)", secondary_y=False, row=1, col=1)
    fig_spread_temp.update_yaxes(title_text="Temperature (°C)", secondary_y=True, row=1, col=1)
    fig_spread_temp.update_yaxes(title_text="Spread (EUR/MWh)", secondary_y=False, row=2, col=1)
    fig_spread_temp.update_yaxes(title_text="Temperature (°C)", secondary_y=True, row=2, col=1)
    fig_spread_temp.update_xaxes(title_text="Date", row=2, col=1)
    
    fig_spread_temp.update_layout(
        title="Spread vs Temperature: Week-by-Week Analysis",
        height=800,
        template='plotly_white',
        showlegend=True
    )
    
    fig_spread_temp.show()
    
    # temp, price / correlation
    print("\n=== TEMPERATURE-PRICE CORRELATION ANALYSIS ===")
    
    # Prepare merged_prices with proper datetime column for merging
    price_data_for_merge = merged_prices.reset_index()
    price_data_for_merge.columns = ['datetime', 'DE_Price', 'HU_Price', 'PRice Delta', 'Price_Delta_HU_DE']
    
    # Merge price and temperature data for correlation
    de_combined = pd.merge_asof(
        price_data_for_merge.sort_values('datetime'),
        de_hourly_temp.sort_values('datetime'),
        on='datetime',
        direction='nearest'
    )
    
    hu_combined = pd.merge_asof(
        price_data_for_merge.sort_values('datetime'),
        hu_hourly_temp.sort_values('datetime'), 
        on='datetime',
        direction='nearest'
    )
    
    de_valid = de_combined.dropna(subset=['DE_Price', 'temperature'])
    hu_valid = hu_combined.dropna(subset=['HU_Price', 'temperature'])
    
    if len(de_valid) > 0 and len(hu_valid) > 0:
        de_price_temp_corr = de_valid['DE_Price'].corr(de_valid['temperature'])
        hu_price_temp_corr = hu_valid['HU_Price'].corr(hu_valid['temperature'])
        spread_de_temp_corr = de_valid['Price_Delta_HU_DE'].corr(de_valid['temperature'])
        spread_hu_temp_corr = hu_valid['Price_Delta_HU_DE'].corr(hu_valid['temperature'])
        
        print(f"DE Price vs DE temp. correlation: {de_price_temp_corr:.3f}")
        print(f"HU Price vs HU temp correlation: {hu_price_temp_corr:.3f}")
        print(f"Spread vs DE temp. correlation: {spread_de_temp_corr:.3f}")
        print(f"Spread vs HU temp: {spread_hu_temp_corr:.3f}")
        
        # Temperature difference impact
        temp_diff = pd.merge(de_hourly_temp, hu_hourly_temp, on='datetime', suffixes=('_de', '_hu'))
        temp_diff['temp_delta'] = temp_diff['temperature_hu'] - temp_diff['temperature_de']
        
        spread_temp_combined = pd.merge_asof(
            price_data_for_merge.sort_values('datetime'),
            temp_diff.sort_values('datetime'),
            on='datetime',
            direction='nearest'
        )
        
        spread_temp_valid = spread_temp_combined.dropna(subset=['Price_Delta_HU_DE', 'temp_delta'])
        if len(spread_temp_valid) > 0:
            temp_delta_spread_corr = spread_temp_valid['Price_Delta_HU_DE'].corr(spread_temp_valid['temp_delta'])
            print(f"Spread vs Temperature Difference (HU-DE) correlation: {temp_delta_spread_corr:.3f}")
        
        print(f"\nKey Insights:")
        print(f"- Average DE temperature during period: {de_hourly_temp['temperature'].mean():.1f}°C")
        print(f"- Average HU temperature during period: {hu_hourly_temp['temperature'].mean():.1f}°C") 
        print(f"- Average temperature difference (HU-DE): {temp_diff['temp_delta'].mean():.1f}°C")
        print(f"- Max daily temperature difference: {temp_diff['temp_delta'].max():.1f}°C")
        print(f"- Min daily temperature difference: {temp_diff['temp_delta'].min():.1f}°C")
    else:
        print("Insufficient data for correlation analysis.")

else:
    pass

Hourly records: 2160
Daily records: 90

Weather data processed and timezone converted!
DE hourly temperature range: 13.3°C to 29.8°C
HU hourly temperature range: 14.0°C to 35.2°C
DE daily max temperature range: 19.7°C to 29.8°C
HU daily max temperature range: 23.0°C to 35.6°C



=== TEMPERATURE-PRICE CORRELATION ANALYSIS ===
DE Price vs DE temp. correlation: -0.143
HU Price vs HU temp correlation: 0.150
Spread vs DE temp. correlation: 0.170
Spread vs HU temp: 0.312
Spread vs Temperature Difference (HU-DE) correlation: 0.275

Key Insights:
- Average DE temperature during period: 19.1°C
- Average HU temperature during period: 24.4°C
- Average temperature difference (HU-DE): 5.3°C
- Max daily temperature difference: 16.0°C
- Min daily temperature difference: -1.5°C


In [51]:
import pandas as pd
import logging

# --- 1. LOAD PRE-PROCESSED GENERATION DATA ---
try:
    # Load the data, using the first column as the index and parsing dates
    gen_de = pd.read_csv(
        r"C:\Users\micha\code\power-market-analysis-de-hu\data\processed\DE_generation.csv", 
        index_col=0, 
        parse_dates=True
    )
    gen_hu = pd.read_csv(
        r"C:\Users\micha\code\power-market-analysis-de-hu\data\processed\HU_generation.csv", 
        index_col=0, 
        parse_dates=True
    )
    
    # Rename the index to 'datetime' for clarity
    gen_de.index.name = 'datetime'
    gen_hu.index.name = 'datetime'
    
    # Convert the timezone-naive index to UTC, matching the price data
    if gen_de.index.tz is None:
        gen_de.index = gen_de.index.tz_localize('UTC')
    else:
        gen_de.index = gen_de.index.tz_convert('UTC')

    if gen_hu.index.tz is None:
        gen_hu.index = gen_hu.index.tz_localize('UTC')
    else:
        gen_hu.index = gen_hu.index.tz_convert('UTC')

    logging.info("Successfully loaded generation data from local CSV files.")
    print("DE and HU generation data loaded successfully.")
    print("\nGerman Generation Data Sample:")
    print(gen_de.head())
    print("\nHungarian Generation Data Sample:")
    print(gen_hu.head())

except FileNotFoundError as e:
    logging.error(f"Generation data CSV not found: {e}")
    print(f"Error: Generation data CSV not found. Please ensure the files exist at the specified path.")
    gen_de = None
    gen_hu = None
except Exception as e:
    logging.error(f"An error occurred while loading generation data: {e}")
    print(f"An error occurred: {e}")
    gen_de = None
    gen_hu = None

2025-08-04 23:33:01,189 - INFO - Successfully loaded generation data from local CSV files.


DE and HU generation data loaded successfully.

German Generation Data Sample:
                                     Biomass Fossil Brown coal/Lignite  \
datetime                                                                 
NaT                        Actual Aggregated         Actual Aggregated   
2025-07-06 22:00:00+00:00             3941.0                   10610.0   
2025-07-06 22:15:00+00:00             3927.0                   10471.0   
2025-07-06 22:30:00+00:00             3927.0                   10464.0   
2025-07-06 22:45:00+00:00             3917.0                   10479.0   

                          Fossil Coal-derived gas         Fossil Gas  \
datetime                                                               
NaT                             Actual Aggregated  Actual Aggregated   
2025-07-06 22:00:00+00:00                   426.0             4461.0   
2025-07-06 22:15:00+00:00                   334.0             4400.0   
2025-07-06 22:30:00+00:00                 

In [52]:
import pandas as pd
import io
import logging

# --- ROBUST DATA LOADING FUNCTION ---
def load_and_clean_generation_data(filepath):
    """
    Loads generation data from a CSV file, cleans it, and prepares it for plotting.
    This function correctly handles the multi-level header which is the source of the errors.
    """
    try:
        df = pd.read_csv(filepath, header=[0, 1], index_col=0, parse_dates=True)

        # --- This is the key cleaning step ---
        # We only want the 'Actual Aggregated' data for our generation plot.
        # This also elegantly solves the duplicate 'Fossil Oil' column issue.
        if 'Actual Aggregated' in df.columns.get_level_values(1):
             df = df.xs('Actual Aggregated', axis=1, level=1)
        else:
            # If the secondary header doesn't exist, just use the main columns
             df.columns = df.columns.droplevel(1)


        # Rename columns to be more friendly for the plot legend
        df = df.rename(columns={
            'Fossil Brown coal/Lignite': 'Lignite',
            'Fossil Hard coal': 'Hard Coal',
            'Fossil Gas': 'Gas',
            'Fossil Oil': 'Oil',
            'Hydro Run-of-river and poundage': 'Hydro Run-of-river',
            'Hydro Water Reservoir': 'Hydro Reservoir',
            'Wind Onshore': 'Wind'
        })

        # Fill any missing values with 0, essential for stacking
        df.fillna(0, inplace=True)

        # Ensure all columns are numeric (this prevents the weird y-axis labels)
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        return df.fillna(0)

    except FileNotFoundError:
        logging.error(f"Error: The file was not found at {filepath}")
        return None
    except Exception as e:
        logging.error(f"An error occurred while processing the file: {e}")
        return None




gen_de_processed = load_and_clean_generation_data(r"C:\Users\micha\code\power-market-analysis-de-hu\data\processed\DE_generation.csv")

if gen_de_processed is not None:
    logging.info("Successfully loaded and cleaned detailed generation data.")
    print("\nCleaned and Detailed German Generation Data (Sample):")
    print(gen_de_processed.head())
else:
    logging.warning("Could not load or process generation data.")

2025-08-04 23:33:01,231 - INFO - Successfully loaded and cleaned detailed generation data.



Cleaned and Detailed German Generation Data (Sample):
                           Biomass  Lignite  Fossil Coal-derived gas     Gas  \
2025-07-07 00:00:00+02:00   3941.0  10610.0                    426.0  4461.0   
2025-07-07 00:15:00+02:00   3927.0  10471.0                    334.0  4400.0   
2025-07-07 00:30:00+02:00   3927.0  10464.0                    339.0  4380.0   
2025-07-07 00:45:00+02:00   3917.0  10479.0                    341.0  4546.0   
2025-07-07 01:00:00+02:00   3909.0  10509.0                    336.0  4246.0   

                           Hard Coal    Oil  Geothermal  Hydro Pumped Storage  \
2025-07-07 00:00:00+02:00     3049.0  281.0        23.0                1180.0   
2025-07-07 00:15:00+02:00     3034.0  281.0        23.0                 863.0   
2025-07-07 00:30:00+02:00     2967.0  281.0        23.0                 164.0   
2025-07-07 00:45:00+02:00     2950.0  281.0        23.0                 195.0   
2025-07-07 01:00:00+02:00     2948.0  282.0        23.0    

In [53]:
import plotly.graph_objects as go

def plot_professional_generation_mix(df, country_name):

    if df is None or df.empty:
        logging.warning(f"No data available to plot for {country_name}.")
        return

    stack_order = [
        'Nuclear', 'Lignite', 'Hard Coal', 'Biomass', 'Waste', 'Other',
        'Gas', 'Oil', 'Geothermal', 'Hydro Reservoir', 'Hydro Run-of-river',
        'Other renewable', 'Wind', 'Solar'
    ]

    # do it like a por woud do it ww professional color palette
    colors = {
        'Solar': '#FFD700',      # gold
        'Wind': '#87CEEB',       # skyblue
        'Other renewable': '#228B22', # forestgreen
        'Hydro Reservoir': '#4682B4',# steelblue
        'Hydro Run-of-river': '#00BFFF', # deepskyblue
        'Geothermal': '#FF4500',  # orangered
        'Gas': '#A0522D',        # sienna
        'Oil': '#404040',        # dark grey
        'Biomass': '#8B4513',    # saddlebrown
        'Waste': '#696969',      # dimgrey
        'Hard Coal': '#2F4F4F',   # darkslategrey
        'Lignite': '#708090',     # slategrey
        'Nuclear': '#800080',    # purple
        'Other': '#D3D3D3'       # lightgrey
    }

    fig = go.Figure()

    # 3. Add a trace for each generation type in the specified order
    # We loop through our defined order, not the dataframe's column order
    for category in stack_order:
        if category in df.columns:
            fig.add_trace(go.Scatter(
                x=df.index,
                y=df[category],
                name=category,
                mode='lines',
                # Key for solid area look: line width is 0
                line=dict(width=0),
                stackgroup='one', # Group all traces into one stack
                fillcolor=colors.get(category, '#CCCCCC'),
                hoverinfo='x+y',
                hovertemplate=f'<b>{category}</b><br>%{{y:,.0f}} MW<extra></extra>'
            ))

    # --- 4. Refine the Aesthetics for a Professional Look ---
    # Add a vertical line to indicate the current time
    current_time = pd.Timestamp('2025-08-04 23:21:34', tz='Europe/Berlin')
    fig.add_vline(
        x=current_time,
        line_width=2,
        line_dash="dash",
        line_color="red"
    )
    fig.add_annotation(
        x=current_time, y=0.9, yref='paper',
        text="<b>Actual</b>", showarrow=False, xanchor="right", xshift=-5
    )
    fig.add_annotation(
        x=current_time, y=0.9, yref='paper',
        text="<b>Forecast</b>", showarrow=False, xanchor="left", xshift=5
    )

    fig.update_layout(
        title=dict(text=f'<b>Electricity Generation Mix - {country_name}</b>', font=dict(size=24), x=0.5),
        yaxis_title='Generation (MW)',
        xaxis_title=None,
        height=600,
        template='plotly_dark',
        hovermode='x unified',
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        xaxis=dict(
            tickformat='%a %d %b\n%H:%M'
        )
    )

    fig.show()

if 'gen_de_processed' in locals() and gen_de_processed is not None:
    plot_professional_generation_mix(gen_de_processed, "Germany")