# Direct Normal Irradiance (DNI) Comparison

This notebook compares actual DNI data from weather models with calculated DNI values using the `solarpy` library for the four equinox and solstice months (March, June, September, December).

## Summary of Findings

PVLib's Simplified Solis model most accurately fits the OpenMeteo DNI data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from dni_models import (
    DniModel,
    calculate_direct_normal_irradiance,
    calculate_pvlib_dni_bulk,
    load_dqydj_data,
    get_cloud_cover_dni_coefficient
)

## Model Configuration

In [None]:

# Models to plot with their line styles
# Note: Haurwitz model is not included as it only provides GHI, not DNI
# Column names and display names will be automatically adjusted for cloud cover
DNI_MODELS_TO_PLOT = [
    (DniModel.SOLARPY, '--'),
    (DniModel.PVLIB_INEICHEN, '-.'),
    (DniModel.PVLIB_SIMPLIFIED_SOLIS, ':'),
    # (DniModel.DQYDJ, (0, (3, 5))),  # Uncomment to include DQYDJ data
]

# Helper function to get adjusted column name and display name
def get_adjusted_model_info(model):
    """Get the adjusted column name and display name for a model."""
    col_name = model.column_name + '_adjusted'
    display_name = model.display_name + ' (adjusted)'
    return col_name, display_name


# Pre-identified sunniest days for each equinox/solstice month
class Dates:
    MARCH = '2023-03-02'
    JUNE = '2023-06-08'
    SEPTEMBER = '2023-09-05'
    DECEMBER = '2022-12-15'

## Configuration

In [None]:
# PV Site ID
PV_SITE_ID = 24667

# Weather model to analyze
WEATHER_MODEL = 'ukmo_seamless'  # Options: best_match, dmi_seamless, gem_seamless, gfs_seamless, icon_seamless, jma_seamless, kma_seamless, knmi_seamless, meteofrance_seamless, metno_seamless, ukmo_seamless

# Data directory - adjust path as needed
DATA_DIR = Path('../data/data-1/timeseries')

# Location parameters for DNI calculation
# These should be set according to the site location
# Example values - adjust based on actual site location
LATITUDE = 51.8992
LONGITUDE = -2.1288
ALTITUDE = 0

# Time offset for measured data alignment (in hours)
# Positive value shifts measured data forward in time
TIME_OFFSET_HOURS = 0

## Load Data

In [None]:
# Find all CSV files for the specified PV site
csv_files = sorted(DATA_DIR.glob(f'{PV_SITE_ID}_*.csv'))
print(f"Found {len(csv_files)} CSV files for site {PV_SITE_ID}")

# Load and concatenate all data
dfs = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Combine all data
data = pd.concat(dfs, ignore_index=True)

# Convert time to datetime
data['time'] = pd.to_datetime(data['time'])

# Apply time offset to measured data for alignment
data['time'] = data['time'] + pd.Timedelta(hours=TIME_OFFSET_HOURS)

# Sort by time and remove duplicates
data = data.sort_values('time').drop_duplicates(subset='time').reset_index(drop=True)

print(f"\nTotal records: {len(data)}")
print(f"Date range: {data['time'].min()} to {data['time'].max()}")
print(f"Applied time offset: {TIME_OFFSET_HOURS} hour(s)")

## Extract Date and Add Month Information

In [None]:
# Extract date (without time)
data['date'] = data['time'].dt.date

# Add month information (aggregating across years)
data['month'] = data['time'].dt.month
data['month_name'] = data['time'].dt.strftime('%B')  # Full month name
data['year'] = data['time'].dt.year

# Filter for equinox and solstice months (March, June, September, December)
equinox_solstice_months = [3, 6, 9, 12]
data_filtered = data[data['month'].isin(equinox_solstice_months)].copy()

# Create month labels
month_names_map = {3: 'March', 6: 'June', 9: 'September', 12: 'December'}
data_filtered['month_label'] = data_filtered['month'].map(month_names_map)

print(f"\nFiltered to equinox/solstice months: {len(data_filtered)} records")
print("\nMonths available in data:")
print(data_filtered.groupby('month_label')['date'].agg(['min', 'max', 'count']))

## Prepare Data Columns

In [None]:
# Column names for the selected weather model
dni_col = f'direct_normal_irradiance_{WEATHER_MODEL}'
cloud_cover_col = f'cloud_cover_{WEATHER_MODEL}'

# Check if columns exist
required_cols = [dni_col, cloud_cover_col]
missing_cols = [col for col in required_cols if col not in data_filtered.columns]
if missing_cols:
    print(f"Warning: Missing columns: {missing_cols}")
    print(f"\nAvailable columns containing '{WEATHER_MODEL}':")
    matching_cols = [col for col in data_filtered.columns if WEATHER_MODEL in col]
    for col in matching_cols:
        print(f"  - {col}")
else:
    print(f"Found all required columns: {required_cols}")

## Use Pre-identified Sunniest Days

In [None]:
# Convert pre-identified dates to a DataFrame for easier processing
from datetime import datetime

sunniest_dates = {
    'March': datetime.strptime(Dates.MARCH, '%Y-%m-%d').date(),
    'June': datetime.strptime(Dates.JUNE, '%Y-%m-%d').date(),
    'September': datetime.strptime(Dates.SEPTEMBER, '%Y-%m-%d').date(),
    'December': datetime.strptime(Dates.DECEMBER, '%Y-%m-%d').date(),
}

print(f"\nUsing pre-identified sunniest days:")
for month_label, date in sunniest_dates.items():
    print(f"  {month_label}: {date}")


## Load Third-Party DQYDJ Data

In [None]:
# Load DQYDJ data files using the dni_models library
dqydj_data = load_dqydj_data(dqydj_dir=Path('resources/dqydj'), verbose=True)

## Plot DNI Comparison - Four Months

In [None]:
# Calculate DNI for all filtered data points
print("Calculating DNI using solarpy...")
data_filtered['dni_calculated'] = data_filtered['time'].apply(
    lambda dt: calculate_direct_normal_irradiance(dt, LATITUDE, LONGITUDE, ALTITUDE)
)
print("Solarpy DNI calculation complete!")

# Calculate DNI for all pvlib models
# Note: Only Ineichen and Simplified Solis models provide DNI
# Haurwitz only provides GHI and is therefore not included
times_utc = pd.DatetimeIndex(data_filtered['time']).tz_localize('UTC')

print("Calculating DNI using pvlib (Ineichen model)...")
data_filtered['dni_pvlib_ineichen'] = calculate_pvlib_dni_bulk(
    times_utc, LATITUDE, LONGITUDE, ALTITUDE, model='ineichen'
).values
print("pvlib-Ineichen DNI calculation complete!")


print("Calculating DNI using pvlib (Simplified Solis model)...")
data_filtered['dni_pvlib_simplified_solis'] = calculate_pvlib_dni_bulk(
    times_utc, LATITUDE, LONGITUDE, ALTITUDE, model='simplified_solis'
).values
print("pvlib-SimplifiedSolis DNI calculation complete!")

# Apply cloud cover adjustment to all calculated DNI models
print("\nApplying cloud cover adjustments to modelled DNI...")
cloud_cover = data_filtered[cloud_cover_col].values
cloud_cover_coefficient = get_cloud_cover_dni_coefficient(cloud_cover)

# Adjust each model's DNI by the cloud cover coefficient
data_filtered['dni_calculated_adjusted'] = data_filtered['dni_calculated'] * cloud_cover_coefficient
data_filtered['dni_pvlib_ineichen_adjusted'] = data_filtered['dni_pvlib_ineichen'] * cloud_cover_coefficient
data_filtered['dni_pvlib_simplified_solis_adjusted'] = data_filtered['dni_pvlib_simplified_solis'] * cloud_cover_coefficient

print(f"Cloud cover adjustment complete!")
print(f"  Cloud cover range: {cloud_cover.min():.2f} to {cloud_cover.max():.2f}")
print(f"  Coefficient range: {cloud_cover_coefficient.min():.2f} to {cloud_cover_coefficient.max():.2f}")

In [None]:
# Get months in chronological order
months = ['March', 'June', 'September', 'December']

# Define colors for seasonal context
month_colors = {
    'March': '#88CC88',      # Spring Equinox - light green
    'June': '#FFD700',       # Summer Solstice - gold
    'September': '#FF8C00',  # Autumn Equinox - orange
    'December': '#4169E1'    # Winter Solstice - blue
}


## Weather Model Comparison - RMS Error Analysis

In [None]:
# Calculate RMS error for each weather model and DNI model combination
# Error is defined as: (modeled - measured) / peak_measured
print("=" * 80)
print("WEATHER MODEL COMPARISON - RMS ERROR ANALYSIS")
print("=" * 80)
print("\nCalculating RMS errors for each weather model and DNI model combination...")
print("across the four dates of interest.\n")

# Identify all available weather models from the data columns
weather_models = []
for col in data_filtered.columns:
    if col.startswith('direct_normal_irradiance_') and not col.endswith('_adjusted'):
        # Extract weather model name
        weather_model_name = col.replace('direct_normal_irradiance_', '')
        # Check if corresponding cloud_cover column exists
        cloud_cover_column = f'cloud_cover_{weather_model_name}'
        if cloud_cover_column in data_filtered.columns:
            weather_models.append(weather_model_name)

weather_models = sorted(set(weather_models))
print(f"Found {len(weather_models)} weather models with both DNI and cloud cover data:")
for wm in weather_models:
    print(f"  - {wm}")
print()

# Get DNI models to analyze (excluding DQYDJ)
dni_models_to_analyze = [(model, model_info) for model, model_info in
                          [(m, get_adjusted_model_info(m)) for m, _ in DNI_MODELS_TO_PLOT]
                          if model != DniModel.DQYDJ]

# Dictionary to store RMS errors: {weather_model: {dni_model: rms_error}}
rms_results = {}

for weather_model in weather_models:
    dni_col_wm = f'direct_normal_irradiance_{weather_model}'
    cloud_cover_col_wm = f'cloud_cover_{weather_model}'

    rms_results[weather_model] = {}

    for dni_model, (col_name_suffix, display_name) in dni_models_to_analyze:
        all_errors = []

        for month_label in months:
            date = sunniest_dates[month_label]

            # Get hourly data for this day
            day_data = data_filtered[data_filtered['date'] == date].copy()

            if len(day_data) == 0:
                continue

            # Get measured DNI from this weather model
            if dni_col_wm not in day_data.columns:
                continue

            measured_dni = day_data[dni_col_wm].values
            peak_measurement = np.max(measured_dni)

            if peak_measurement <= 0:
                continue

            # Get the modeled DNI (adjusted by cloud cover from this weather model)
            # We need to use the base DNI model column
            base_col_name = dni_model.column_name
            if base_col_name not in day_data.columns:
                continue

            # Apply cloud cover adjustment specific to this weather model
            cloud_cover_wm = day_data[cloud_cover_col_wm].values
            cloud_cover_coefficient_wm = get_cloud_cover_dni_coefficient(cloud_cover_wm)
            modeled_dni_adjusted = day_data[base_col_name].values * cloud_cover_coefficient_wm

            # Calculate normalized errors for this day
            errors = (modeled_dni_adjusted - measured_dni) / peak_measurement * 100
            all_errors.extend(errors)

        if all_errors:
            # Calculate RMS error
            rms_error = np.sqrt(np.mean(np.array(all_errors)**2))
            rms_results[weather_model][dni_model] = rms_error
        else:
            rms_results[weather_model][dni_model] = np.nan

# Create a formatted table
print("RMS Error (%) - Weather Models vs DNI Models")
print("=" * 80)

# Get DNI model names for header
dni_model_names = [display_name for _, (_, display_name) in dni_models_to_analyze]
dni_models_list = [dni_model for dni_model, _ in dni_models_to_analyze]

# Print header
header = f"{'Weather Model':<30}"
for name in dni_model_names:
    header += f"{name:>20}"
print(header)
print("-" * (30 + 20 * len(dni_model_names)))

# Print data rows
for weather_model in weather_models:
    row = f"{weather_model:<30}"
    for dni_model in dni_models_list:
        rms = rms_results[weather_model].get(dni_model, np.nan)
        if np.isnan(rms):
            row += f"{'N/A':>20}"
        else:
            row += f"{rms:>20.2f}"
    print(row)

print("=" * 80)

# Find best weather model for each DNI model
print("\nBest Weather Model for Each DNI Model:")
print("-" * 80)
for idx, dni_model in enumerate(dni_models_list):
    dni_name = dni_model_names[idx]
    best_weather_model = None
    best_rms = float('inf')

    for weather_model in weather_models:
        rms = rms_results[weather_model].get(dni_model, np.nan)
        if not np.isnan(rms) and rms < best_rms:
            best_rms = rms
            best_weather_model = weather_model

    if best_weather_model:
        print(f"  {dni_name:<40} -> {best_weather_model:<25} (RMS: {best_rms:.2f}%)")
    else:
        print(f"  {dni_name:<40} -> No valid data")

# Find best DNI model for each weather model
print("\nBest DNI Model for Each Weather Model:")
print("-" * 80)
for weather_model in weather_models:
    best_dni_model = None
    best_rms = float('inf')

    for idx, dni_model in enumerate(dni_models_list):
        rms = rms_results[weather_model].get(dni_model, np.nan)
        if not np.isnan(rms) and rms < best_rms:
            best_rms = rms
            best_dni_model = dni_model_names[idx]

    if best_dni_model:
        print(f"  {weather_model:<30} -> {best_dni_model:<40} (RMS: {best_rms:.2f}%)")
    else:
        print(f"  {weather_model:<30} -> No valid data")

print("=" * 80 + "\n")


## Plot DNI Comparison - Separate Subplots by Month

In [None]:
# Temporarily disable interactive plotting to prevent the inline backend
# from auto-rendering figures (which can cause double outputs). We'll
# restore the previous interactive state after displaying/closing the figures.
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create first figure with DNI plots (2x2)
fig_dni, axes_dni = plt.subplots(2, 2, figsize=(16, 12))
fig_dni.suptitle(f'Direct Normal Irradiance: Measured vs Calculated by Month',
                 fontsize=16, fontweight='bold')
dni_axes = axes_dni.flatten()

# Create second figure with error plots (2x2)
fig_error, axes_error = plt.subplots(2, 2, figsize=(16, 12))
fig_error.suptitle(f'Model Errors: (Calculated - Measured) / Peak Measurement',
                   fontsize=16, fontweight='bold')
error_axes = axes_error.flatten()

# Plot each month's sunniest day in its own subplot
for idx, month_label in enumerate(months):
    date = sunniest_dates[month_label]

    # Get hourly data for this day
    day_data = data_filtered[data_filtered['date'] == date].copy()
    day_data = day_data.sort_values('time')

    # Extract hour for x-axis
    hours = day_data['time'].dt.hour + day_data['time'].dt.minute / 60

    color = month_colors.get(month_label, 'blue')

    # Get measured DNI values
    measured_dni = day_data[dni_col].values

    # Plot measured DNI as circle markers only (no line)
    dni_axes[idx].plot(hours, measured_dni, linestyle='', marker='o',
                      label='Measured', markersize=6, color=color, alpha=0.8)

    # Plot models based on DNI_MODELS_TO_PLOT configuration
    for model, linestyle in DNI_MODELS_TO_PLOT:
        col_name, display_name = get_adjusted_model_info(model)
        if model == DniModel.DQYDJ:
            # Special handling for DQYDJ data (different source)
            if month_label in dqydj_data:
                dqydj_day = dqydj_data[month_label]
                dqydj_hours = dqydj_day['time'].dt.hour + dqydj_day['time'].dt.minute / 60
                dni_axes[idx].plot(dqydj_hours, dqydj_day['dni_wm2'], linestyle=linestyle,
                                  label=display_name, linewidth=2.5, color=color, alpha=0.9)
        else:
            # Plot calculated DNI from data columns
            if col_name in day_data.columns:
                dni_axes[idx].plot(hours, day_data[col_name], linestyle=linestyle,
                                  label=display_name, linewidth=2.5, color=color, alpha=0.9)

    # Configure DNI subplot
    dni_axes[idx].set_title(f'{month_label} - {date}', fontsize=12, fontweight='bold')
    dni_axes[idx].set_xlabel('Hour of Day', fontsize=10)
    dni_axes[idx].set_ylabel('DNI (W/mÂ²)', fontsize=10)
    dni_axes[idx].legend(loc='best', fontsize=9)
    dni_axes[idx].grid(True, alpha=0.3)
    dni_axes[idx].set_xlim(0, 24)
    dni_axes[idx].set_ylim(bottom=0)

    # Calculate percentage errors: (calculated - measured) / peak_measurement * 100
    # where peak_measurement is the peak measured DNI for this day
    peak_measurement = np.max(measured_dni)

    if peak_measurement > 0:
        # Plot errors for each model in DNI_MODELS_TO_PLOT
        for model, linestyle in DNI_MODELS_TO_PLOT:
            col_name, display_name = get_adjusted_model_info(model)
            if model == DniModel.DQYDJ:
                # Special handling for DQYDJ data (different source)
                if month_label in dqydj_data:
                    dqydj_day = dqydj_data[month_label]
                    dqydj_hours = dqydj_day['time'].dt.hour + dqydj_day['time'].dt.minute / 60

                    # Match DQYDJ times to measured data times
                    dqydj_dni = dqydj_day['dni_wm2'].values
                    dqydj_error_list = []
                    dqydj_hours_matched = []

                    for h, m_dni in zip(hours, measured_dni):
                        # Find closest DQYDJ hour
                        idx_closest = np.argmin(np.abs(dqydj_hours - h))
                        if np.abs(dqydj_hours.iloc[idx_closest] - h) < 0.5:  # Within 30 minutes
                            dqydj_error = ((dqydj_dni[idx_closest] - m_dni) / peak_measurement) * 100
                            dqydj_error_list.append(dqydj_error)
                            dqydj_hours_matched.append(h)

                    if dqydj_error_list:
                        error_axes[idx].plot(dqydj_hours_matched, dqydj_error_list, linestyle=linestyle,
                                            label=display_name, linewidth=2.5, color=color, alpha=0.9)
            else:
                # Calculate error from data columns
                if col_name in day_data.columns:
                    model_error = ((day_data[col_name].values - measured_dni) / peak_measurement) * 100
                    error_axes[idx].plot(hours, model_error, linestyle=linestyle,
                                        label=display_name, linewidth=2.5, color=color, alpha=0.9)

        # Add zero reference line
        error_axes[idx].axhline(y=0, color='black', linestyle='-', linewidth=1, alpha=0.3)

        # Configure error subplot
        error_axes[idx].set_title(f'{month_label} - Error', fontsize=12, fontweight='bold')
        error_axes[idx].set_xlabel('Hour of Day', fontsize=10)
        error_axes[idx].set_ylabel('Error (%)', fontsize=10)
        error_axes[idx].legend(loc='best', fontsize=9)
        error_axes[idx].grid(True, alpha=0.3)
        error_axes[idx].set_xlim(0, 24)

# Show both figures
from IPython.display import display

# Ensure nice layout then explicitly display the DNI figure first, followed by the error figure
fig_dni.tight_layout()
display(fig_dni)
plt.close(fig_dni)

fig_error.tight_layout()
display(fig_error)
plt.close(fig_error)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()
