# Sunniest Days Analysis - Equinox & Solstice Months

This notebook identifies the sunniest days (minimum cloud cover) for equinox and solstice months (March, June, September, December) and plots the relevant irradiance data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

## Configuration

In [None]:
# PV Site ID
PV_SITE_ID = 24667

# Weather model to analyze
WEATHER_MODEL = 'best_match'  # Options: best_match, dmi_seamless, gem_seamless, gfs_seamless, icon_seamless, jma_seamless, kma_seamless, knmi_seamless, meteofrance_seamless, metno_seamless, ukmo_seamless

# Data directory - adjust path as needed
DATA_DIR = Path('../data/data-1/timeseries')

# Number of sunniest days to identify per month
N_DAYS_PER_MONTH = 1  # Only the top sunniest day

## Load Data

In [None]:
# Find all CSV files for the specified PV site
csv_files = sorted(DATA_DIR.glob(f'{PV_SITE_ID}_*.csv'))
print(f"Found {len(csv_files)} CSV files for site {PV_SITE_ID}")

# Load and concatenate all data
dfs = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

# Combine all data
data = pd.concat(dfs, ignore_index=True)

# Convert time to datetime
data['time'] = pd.to_datetime(data['time'])

# Sort by time and remove duplicates
data = data.sort_values('time').drop_duplicates(subset='time').reset_index(drop=True)

print(f"\nTotal records: {len(data)}")
print(f"Date range: {data['time'].min()} to {data['time'].max()}")

## Extract Date and Add Month Information

In [None]:
# Extract date (without time)
data['date'] = data['time'].dt.date

# Add month information (aggregating across years)
data['month'] = data['time'].dt.month
data['month_name'] = data['time'].dt.strftime('%B')  # Full month name
data['year'] = data['time'].dt.year

# Filter for equinox and solstice months (March, June, September, December)
equinox_solstice_months = [3, 6, 9, 12]
data_filtered = data[data['month'].isin(equinox_solstice_months)].copy()

# Create month labels
month_names_map = {3: 'March', 6: 'June', 9: 'September', 12: 'December'}
data_filtered['month_label'] = data_filtered['month'].map(month_names_map)

print(f"\nFiltered to equinox/solstice months: {len(data_filtered)} records")
print("\nMonths available in data:")
print(data_filtered.groupby('month_label')['date'].agg(['min', 'max', 'count']))

## Calculate Daily Cloud Cover

In [None]:
# Column names for the selected weather model
cloud_cover_col = f'cloud_cover_{WEATHER_MODEL}'
dni_col = f'direct_normal_irradiance_{WEATHER_MODEL}'
diffuse_col = f'diffuse_radiation_{WEATHER_MODEL}'

# Check if columns exist
required_cols = [cloud_cover_col, dni_col, diffuse_col]
missing_cols = [col for col in required_cols if col not in data_filtered.columns]
if missing_cols:
    print(f"Warning: Missing columns: {missing_cols}")
    print(f"\nAvailable columns containing '{WEATHER_MODEL}':")
    matching_cols = [col for col in data_filtered.columns if WEATHER_MODEL in col]
    for col in matching_cols:
        print(f"  - {col}")
else:
    print(f"All required columns found for model '{WEATHER_MODEL}'")

# Calculate daily averages (aggregating across all years)
daily_data = data_filtered.groupby(['date', 'month', 'month_label']).agg({
    cloud_cover_col: 'mean',
    dni_col: 'mean',
    diffuse_col: 'mean'
}).reset_index()

print(f"\nDaily data summary:")
print(daily_data.describe())

## Identify Sunniest Day Per Month (Across All Years)

In [None]:
# Find sunniest day (lowest cloud cover) for each month across all years
sunniest_days = []

for month_label in ['March', 'June', 'September', 'December']:
    month_data = daily_data[daily_data['month_label'] == month_label]
    
    if len(month_data) > 0:
        # Sort by cloud cover (ascending) and take the single best day across all years
        sunniest = month_data.nsmallest(N_DAYS_PER_MONTH, cloud_cover_col)
        sunniest_days.append(sunniest)

sunniest_days_df = pd.concat(sunniest_days, ignore_index=True)

print(f"\nSunniest day per equinox/solstice month (across all years):")
print(sunniest_days_df[['date', 'month_label', cloud_cover_col, dni_col, diffuse_col]])

## Plot Sunniest Days

In [None]:
# Create three figures, one for each variable
fig, axes = plt.subplots(3, 1, figsize=(14, 12))
fig.suptitle(f'Sunniest Days in Equinox & Solstice Months (Model: {WEATHER_MODEL})', fontsize=16, fontweight='bold')

# Get months in chronological order
months = ['March', 'June', 'September', 'December']
months = [m for m in months if m in sunniest_days_df['month_label'].values]

# Define colors for seasonal context
month_colors = {
    'March': '#88CC88',      # Spring Equinox - light green
    'June': '#FFD700',       # Summer Solstice - gold
    'September': '#FF8C00',  # Autumn Equinox - orange
    'December': '#4169E1'    # Winter Solstice - blue
}

# Plot each month's sunniest day
for month_label in months:
    month_sunniest = sunniest_days_df[sunniest_days_df['month_label'] == month_label]
    
    # Should only be one row per month
    for idx, row in month_sunniest.iterrows():
        date = row['date']
        
        # Get hourly data for this day
        day_data = data_filtered[data_filtered['date'] == date].copy()
        day_data = day_data.sort_values('time')
        
        # Extract hour for x-axis
        hours = day_data['time'].dt.hour + day_data['time'].dt.minute / 60
        
        label = f"{month_label}: {date} (avg: {row[cloud_cover_col]:.1f}%)"
        color = month_colors.get(month_label, None)
        
        # Plot cloud cover on first subplot
        axes[0].plot(hours, day_data[cloud_cover_col], marker='o', label=label, linewidth=2, markersize=4, color=color)
        
        # Plot direct normal irradiance on second subplot
        axes[1].plot(hours, day_data[dni_col], marker='o', label=label, linewidth=2, markersize=4, color=color)
        
        # Plot diffuse radiation on third subplot
        axes[2].plot(hours, day_data[diffuse_col], marker='o', label=label, linewidth=2, markersize=4, color=color)

# Configure axes
axes[0].set_ylabel('Cloud Cover (%)', fontsize=12)
axes[0].set_title('Cloud Cover Throughout the Day', fontsize=13, fontweight='bold')
axes[0].legend(loc='best', fontsize=9)
axes[0].grid(True, alpha=0.3)
axes[0].set_xlim(0, 24)

axes[1].set_ylabel('Direct Normal Irradiance (W/m²)', fontsize=12)
axes[1].set_title('Direct Normal Irradiance Throughout the Day', fontsize=13, fontweight='bold')
axes[1].legend(loc='best', fontsize=9)
axes[1].grid(True, alpha=0.3)
axes[1].set_xlim(0, 24)

axes[2].set_xlabel('Hour of Day', fontsize=12)
axes[2].set_ylabel('Diffuse Radiation (W/m²)', fontsize=12)
axes[2].set_title('Diffuse Radiation Throughout the Day', fontsize=13, fontweight='bold')
axes[2].legend(loc='best', fontsize=9)
axes[2].grid(True, alpha=0.3)
axes[2].set_xlim(0, 24)

plt.tight_layout()
plt.show()

In [None]:
## Summary Statistics

# Summary by month (aggregated across years)
summary = sunniest_days_df.groupby('month_label').agg({
    cloud_cover_col: ['mean', 'min', 'max'],
    dni_col: ['mean', 'min', 'max'],
    diffuse_col: ['mean', 'min', 'max']
}).round(2)

# Reorder to chronological order
month_order = ['March', 'June', 'September', 'December']
summary = summary.reindex([m for m in month_order if m in summary.index])

print("\nSummary statistics for sunniest days by equinox/solstice month:")
print(summary)

In [None]:
## Compare Months

# Create comparison plots across months
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle(f'Comparison of Sunniest Days in Equinox & Solstice Months (Model: {WEATHER_MODEL})', fontsize=16, fontweight='bold')

# Reorder data for chronological plotting
month_order = ['March', 'June', 'September', 'December']
sunniest_days_df['month_label'] = pd.Categorical(sunniest_days_df['month_label'], categories=month_order, ordered=True)
sunniest_days_df_sorted = sunniest_days_df.sort_values('month_label')

# Box plot for cloud cover
sunniest_days_df_sorted.boxplot(column=cloud_cover_col, by='month_label', ax=axes[0])
axes[0].set_title('Cloud Cover')
axes[0].set_xlabel('Month')
axes[0].set_ylabel('Cloud Cover (%)')
axes[0].get_figure().suptitle('')  # Remove automatic title

# Box plot for DNI
sunniest_days_df_sorted.boxplot(column=dni_col, by='month_label', ax=axes[1])
axes[1].set_title('Direct Normal Irradiance')
axes[1].set_xlabel('Month')
axes[1].set_ylabel('DNI (W/m²)')
axes[1].get_figure().suptitle('')

# Box plot for diffuse radiation
sunniest_days_df_sorted.boxplot(column=diffuse_col, by='month_label', ax=axes[2])
axes[2].set_title('Diffuse Radiation')
axes[2].set_xlabel('Month')
axes[2].set_ylabel('Diffuse Radiation (W/m²)')
axes[2].get_figure().suptitle('')

fig.suptitle(f'Comparison of Sunniest Days in Equinox & Solstice Months (Model: {WEATHER_MODEL})', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Create comparison plots across quarters
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle(f'Comparison of Sunniest Days Across Quarters (Model: {WEATHER_MODEL})', fontsize=16, fontweight='bold')

# Box plot for cloud cover
sunniest_days_df.boxplot(column=cloud_cover_col, by='quarter_label', ax=axes[0])
axes[0].set_title('Cloud Cover')
axes[0].set_xlabel('Quarter')
axes[0].set_ylabel('Cloud Cover (%)')
axes[0].get_figure().suptitle('')  # Remove automatic title

# Box plot for DNI
sunniest_days_df.boxplot(column=dni_col, by='quarter_label', ax=axes[1])
axes[1].set_title('Direct Normal Irradiance')
axes[1].set_xlabel('Quarter')
axes[1].set_ylabel('DNI (W/m²)')
axes[1].get_figure().suptitle('')

# Box plot for diffuse radiation
sunniest_days_df.boxplot(column=diffuse_col, by='quarter_label', ax=axes[2])
axes[2].set_title('Diffuse Radiation')
axes[2].set_xlabel('Quarter')
axes[2].set_ylabel('Diffuse Radiation (W/m²)')
axes[2].get_figure().suptitle('')

fig.suptitle(f'Comparison of Sunniest Days Across Quarters (Model: {WEATHER_MODEL})', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()