# GHI Analysis

This notebook explores the relationship between Direct Normal Irradiance (DNI) and Global Horizontal Irradiance (GHI) using the formula: `GHI = DNI * cos(apparent_zenith) + DHI`.

The notebook then tries a Plane-Of-Array (POA) irradiance computation for a zero-tilted solar panel, to check consistency with GHI.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import pvlib


## Configuration

In [None]:
# PV Site ID
PV_SITE_ID = 24667

# Weather model to analyze
WEATHER_MODEL = 'ukmo_seamless'  # Options: best_match, dmi_seamless, gem_seamless, gfs_seamless, icon_seamless, jma_seamless, kma_seamless, knmi_seamless, meteofrance_seamless, metno_seamless, ukmo_seamless

# Data directory - adjust path as needed
DATA_DIR = Path('../data/data-1/timeseries')

# Location parameters for DNI calculation
LATITUDE = 51.8992
LONGITUDE = -2.1288
ALTITUDE = 0

# Time offset for measured data alignment (in hours)
TIME_OFFSET_HOURS = 0

# Solar panel geometry
PANEL_TILT = 0  # degrees from horizontal
PANEL_AZIMUTH = 180  # degrees (180 = south-facing)


# Pre-identified sunniest days for each equinox/solstice month
class Dates:
    MARCH = '2023-03-02'
    JUNE = '2023-06-08'
    SEPTEMBER = '2023-09-05'
    DECEMBER = '2022-12-15'


## Load Data

In [None]:
# Find all CSV files for the specified PV site
csv_files = sorted(DATA_DIR.glob(f'{PV_SITE_ID}_*.csv'))
print(f"Found {len(csv_files)} CSV files for site {PV_SITE_ID}")

# Load and concatenate all data
dfs = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Combine all data
data = pd.concat(dfs, ignore_index=True)

# Convert time to datetime
data['time'] = pd.to_datetime(data['time'])

# Apply time offset to measured data for alignment
data['time'] = data['time'] + pd.Timedelta(hours=TIME_OFFSET_HOURS)

# Sort by time and remove duplicates
data = data.sort_values('time').drop_duplicates(subset='time').reset_index(drop=True)

print(f"\nTotal records: {len(data)}")
print(f"Date range: {data['time'].min()} to {data['time'].max()}")
print(f"Applied time offset: {TIME_OFFSET_HOURS} hour(s)")


## Filter for Sunniest Days

In [None]:
from datetime import datetime

# Pre-identified sunniest days
sunniest_dates = {
    'March': datetime.strptime(Dates.MARCH, '%Y-%m-%d').date(),
    'June': datetime.strptime(Dates.JUNE, '%Y-%m-%d').date(),
    'September': datetime.strptime(Dates.SEPTEMBER, '%Y-%m-%d').date(),
    'December': datetime.strptime(Dates.DECEMBER, '%Y-%m-%d').date(),
}

print("Using pre-identified sunniest days:")
for month_label, date in sunniest_dates.items():
    print(f"  {month_label}: {date}")

# Extract date (without time) for filtering
data['date'] = data['time'].dt.date

# Create data_filtered as a list of four dataframes, one per date
months = ['March', 'June', 'September', 'December']
data_filtered = []

for month_label in months:
    date = sunniest_dates[month_label]
    day_data = data[data['date'] == date].copy()
    day_data = day_data.sort_values('time').reset_index(drop=True)
    data_filtered.append(day_data)
    print(f"  {month_label} ({date}): {len(day_data)} records")

print(f"\nTotal records across all dates: {sum(len(df) for df in data_filtered)}")


## Prepare Column Names

In [None]:
# Column names for the selected weather model
dni_col = f'direct_normal_irradiance_{WEATHER_MODEL}'
cloud_cover_col = f'cloud_cover_{WEATHER_MODEL}'
dhi_col = f'diffuse_radiation_{WEATHER_MODEL}'

print(f"\nUsing columns:")
print(f"  DNI: {dni_col}")
print(f"  Cloud Cover: {cloud_cover_col}")
print(f"  DHI: {dhi_col}")



## Calculate DNI and GHI using pvlib Simplified Solis Model

In [None]:
# Calculate using pvlib Simplified Solis model
print("\nCalculating DNI and GHI using pvlib Simplified Solis model...")

# Create location object
location = pvlib.location.Location(LATITUDE, LONGITUDE, altitude=ALTITUDE, tz='UTC')

# Process each date separately
for idx, (month_label, day_data) in enumerate(zip(months, data_filtered)):
    # Prepare times
    times_utc = pd.DatetimeIndex(day_data['time']).tz_localize('UTC')

    # Get clear sky irradiance (includes both DNI and GHI)
    clearsky = location.get_clearsky(times_utc, model='simplified_solis')

    # Extract DNI and GHI
    data_filtered[idx]['dni_modelled'] = clearsky['dni'].values
    data_filtered[idx]['ghi_modelled'] = clearsky['ghi'].values

print("Calculation complete!")

# Show statistics across all dates
all_dni = pd.concat([df['dni_modelled'] for df in data_filtered])
all_ghi = pd.concat([df['ghi_modelled'] for df in data_filtered])
print(f"  Modelled DNI range: {all_dni.min():.2f} to {all_dni.max():.2f} W/m²")
print(f"  Modelled GHI range: {all_ghi.min():.2f} to {all_ghi.max():.2f} W/m²")


## Calculate Solar Position for GHI Calculation

In [None]:
# Calculate solar position to get apparent_zenith for GHI calculation
print("\nCalculating solar position for GHI calculation...")

# Process each date separately
for idx, (month_label, day_data) in enumerate(zip(months, data_filtered)):
    # Note: the data have right-labeled hourly intervals, e.g. the
    # 10AM to 11AM interval is labeled 11. We should calculate solar position in
    # the middle of the interval (10:30), so we subtract 30 minutes:
    times = day_data['time'] - pd.Timedelta('30min')
    times_utc = pd.DatetimeIndex(times).tz_localize('UTC')

    solar_position = location.get_solarposition(times_utc)

    # Store apparent_zenith for later use
    data_filtered[idx]['apparent_zenith'] = solar_position['apparent_zenith'].values

print("Solar position calculated!")


## Calculate GHI using DNI * cos(apparent_zenith) + DHI

In [None]:
# Calculate GHI using the formula: GHI = DNI * cos(apparent_zenith) + DHI
# This uses measured DNI and DHI from the weather model
print("\nCalculating GHI using formula: GHI = DNI * cos(apparent_zenith) + DHI...")

for day_data in data_filtered:
    # Convert apparent_zenith from degrees to radians for cos calculation
    zenith_radians = np.radians(day_data['apparent_zenith'])

    # Calculate GHI using the formula
    day_data['ghi_calculated'] = (
        day_data[dni_col] * np.cos(zenith_radians) + day_data[dhi_col]
    )

print("GHI calculation complete!")

# Show statistics across all dates
all_ghi_calc = pd.concat([df['ghi_calculated'] for df in data_filtered])
print(f"  Calculated GHI range: {all_ghi_calc.min():.2f} to {all_ghi_calc.max():.2f} W/m²")


## Apply Cloud Cover Adjustments to Modelled DNI and GHI

In [None]:
from dni_models import get_cloud_cover_dni_coefficient

print("\nApplying cloud cover adjustments to modelled values...")

# Process each date separately
for day_data in data_filtered:
    cloud_cover = day_data[cloud_cover_col].values
    cloud_cover_coefficient = get_cloud_cover_dni_coefficient(cloud_cover)

    # Adjust both DNI and GHI by cloud cover coefficient
    day_data['dni_modelled_adjusted'] = day_data['dni_modelled'] * cloud_cover_coefficient
    day_data['ghi_modelled_adjusted'] = day_data['ghi_modelled'] * cloud_cover_coefficient

print(f"Cloud cover adjustment complete!")

# Show statistics across all dates
all_cloud_cover = pd.concat([df[cloud_cover_col] for df in data_filtered])
all_dni_adjusted = pd.concat([df['dni_modelled_adjusted'] for df in data_filtered])
all_ghi_adjusted = pd.concat([df['ghi_modelled_adjusted'] for df in data_filtered])
print(f"  Cloud cover range: {all_cloud_cover.min():.2f} to {all_cloud_cover.max():.2f}")
print(f"  Adjusted DNI range: {all_dni_adjusted.min():.2f} to {all_dni_adjusted.max():.2f} W/m²")
print(f"  Adjusted GHI range: {all_ghi_adjusted.min():.2f} to {all_ghi_adjusted.max():.2f} W/m²")



## Solar Array Geometry and POA Irradiance

In [None]:
print("\n" + "=" * 80)
print("SOLAR ARRAY GEOMETRY AND POA IRRADIANCE")
print("=" * 80)
print(f"\nPanel configuration:")
print(f"  Tilt: {PANEL_TILT}°")
print(f"  Azimuth: {PANEL_AZIMUTH}° (180° = south-facing)")


### Calculate Solar Position and POA Irradiance

In [None]:
print("\nCalculating solar position and POA irradiance for each date...")

# Process each date separately
for idx, (month_label, day_data) in enumerate(zip(months, data_filtered)):
    # Note: the data have right-labeled hourly intervals, e.g. the
    # 10AM to 11AM interval is labeled 11. We should calculate solar position in
    # the middle of the interval (10:30), so we subtract 30 minutes:
    times = day_data['time'] - pd.Timedelta('30min')
    times_utc = pd.DatetimeIndex(times).tz_localize('UTC')

    solar_position = location.get_solarposition(times_utc)

    # Get DNI and DHI from the data
    dni_measured = day_data[dni_col].values
    dhi_measured = day_data[dhi_col].values

    # Calculate POA irradiance using pvlib
    df_poa = pvlib.irradiance.get_total_irradiance(
        surface_tilt=PANEL_TILT,
        surface_azimuth=PANEL_AZIMUTH,
        dni=dni_measured,
        ghi=day_data['ghi_calculated'].values,
        dhi=dhi_measured,
        solar_zenith=solar_position['apparent_zenith'],
        solar_azimuth=solar_position['azimuth'],
        model='isotropic'
    )

    # Add POA total and components to our dataframe
    data_filtered[idx]['poa_modelled'] = df_poa['poa_global'].values
    data_filtered[idx]['poa_direct'] = df_poa['poa_direct'].values
    data_filtered[idx]['poa_diffuse'] = df_poa['poa_diffuse'].values

print("Solar position and POA irradiance calculated!")

# Show statistics across all dates
all_poa = pd.concat([df['poa_modelled'] for df in data_filtered])
all_poa_direct = pd.concat([df['poa_direct'] for df in data_filtered])
all_poa_diffuse = pd.concat([df['poa_diffuse'] for df in data_filtered])
print(f"  POA total range: {all_poa.min():.2f} to {all_poa.max():.2f} W/m²")
print(f"  POA direct range: {all_poa_direct.min():.2f} to {all_poa_direct.max():.2f} W/m²")
print(f"  POA diffuse range: {all_poa_diffuse.min():.2f} to {all_poa_diffuse.max():.2f} W/m²")


### POA Irradiance Comparison (2x2)

In [None]:
# Define colors for seasonal context
month_colors = {
    'March': '#88CC88',  # Spring Equinox - light green
    'June': '#FFD700',  # Summer Solstice - gold
    'September': '#FF8C00',  # Autumn Equinox - orange
    'December': '#4169E1'  # Winter Solstice - blue
}

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create figure for POA comparison (2x2)
fig_poa, axes_poa = plt.subplots(2, 2, figsize=(16, 12))
fig_poa.suptitle(f'POA Irradiance Comparison - Panel Tilt={PANEL_TILT}°, Azimuth={PANEL_AZIMUTH}°',
                 fontsize=16, fontweight='bold')
poa_axes = axes_poa.flatten()

# Plot each month's sunniest day
for idx, (month_label, day_data) in enumerate(zip(months, data_filtered)):
    date = sunniest_dates[month_label]

    # Extract hour for x-axis
    hours = day_data['time'].dt.hour + day_data['time'].dt.minute / 60

    color = month_colors.get(month_label, 'blue')

    # Plot 1: Measured DNI (triangle markers)
    poa_axes[idx].plot(hours, day_data[dni_col], linestyle='',
                       marker='^', markersize=6, color=color,
                       alpha=0.8, label='Measured DNI')

    # Plot 2: Measured DHI (square markers)
    poa_axes[idx].plot(hours, day_data[dhi_col],
                       linestyle='', marker='s', markersize=6, color=color,
                       alpha=0.8, label='Measured DHI')

    # Plot 3: Calculated GHI (circle markers)
    poa_axes[idx].plot(hours, day_data['ghi_calculated'],
                       linestyle='', marker='o', markersize=6, color=color,
                       alpha=0.8, label='Calculated GHI (DNI*cos(z)+DHI)')

    # Plot 4: Modelled DNI adjusted (solid line)
    poa_axes[idx].plot(hours, day_data['dni_modelled_adjusted'],
                       linestyle='-', linewidth=2.5, color=color,
                       alpha=0.9, label='Modelled DNI (adjusted)')

    # Plot 5: Calculated Direct POA (dashed line)
    poa_axes[idx].plot(hours, day_data['poa_direct'],
                       linestyle='--', linewidth=2.5, color=color,
                       alpha=0.9, label='Calculated Direct POA')

    # Plot 6: Calculated Diffuse POA (dotted line)
    poa_axes[idx].plot(hours, day_data['poa_diffuse'],
                       linestyle=':', linewidth=2.5, color=color,
                       alpha=0.9, label='Calculated Diffuse POA')

    # Plot 7: Calculated Total POA (dash-dot line)
    poa_axes[idx].plot(hours, day_data['poa_modelled'],
                       linestyle='-.', linewidth=2.5, color=color,
                       alpha=0.9, label='Calculated Total POA')

    # Configure subplot
    poa_axes[idx].set_title(f'{month_label} - {date}', fontsize=12, fontweight='bold')
    poa_axes[idx].set_xlabel('Hour of Day', fontsize=10)
    poa_axes[idx].set_ylabel('Irradiance (W/m²)', fontsize=10)
    poa_axes[idx].legend(loc='best', fontsize=9)
    poa_axes[idx].grid(True, alpha=0.3)
    poa_axes[idx].set_xlim(0, 24)
    poa_axes[idx].set_ylim(bottom=0)

# Show figure
fig_poa.tight_layout()
display(fig_poa)
plt.close(fig_poa)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()

print("\nAnalysis complete!")


The above figures show that when total POA is computed for a solar array with zero tilt, it matches the GHI calculation