# Worst 3-Day Correlation Analysis

This notebook identifies the three consecutive days in the dataset that give the worst correlation
between POA (Plane of Array) irradiance and power output. It chunks the timeline into 3-day periods
and analyzes each period to find where the correlation is poorest.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from datetime import timedelta

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')


## Configuration

Set the file path and column names.


In [None]:
# Path to the CSV file
CSV_FILE = Path('../../data/data-3/timeseries/61272.csv')

# Column names (adjust these based on your CSV structure)
TIME_COLUMN = 'time'  # Name of the time/datetime column
POA_COLUMN = 'plane_of_array_irradiance'  # Name of the POA irradiance column
POWER_COLUMN = 'power'  # Name of the power column
CLOUD_COVER_COLUMN = 'cloud_cover'  # Name of the cloud cover column (percentage)

# Filtering options (set to None to disable filtering)
FILTER_WEATHER_CODE_MIN = None  # e.g., 0 or None for no minimum
FILTER_WEATHER_CODE_MAX = None  # e.g., 3 or None for no maximum
FILTER_WIND_SPEED_MIN = None  # e.g., 0.0 or None for no minimum
FILTER_WIND_SPEED_MAX = None  # e.g., 10.0 or None for no maximum


## Load Data

Read the CSV file and parse the time column.


In [None]:
# Read the CSV file
print(f"Loading data from: {CSV_FILE}")
df = pd.read_csv(CSV_FILE)

# Display first few rows to inspect the data
print("\nFirst few rows of the dataset:")
print(df.head())

print("\nDataset shape:", df.shape)
print("\nColumn names:")
print(df.columns.tolist())


In [None]:
# Parse the time column as datetime
df[TIME_COLUMN] = pd.to_datetime(df[TIME_COLUMN])

# Sort by time to ensure proper processing
df = df.sort_values(TIME_COLUMN).reset_index(drop=True)

# Store the ORIGINAL unfiltered dataframe for plotting
df_original = df.copy()

# Store original record count
original_count = len(df)

# Build filter description and apply filters
filter_desc = []
weather_code_range = ""  # Initialize for later use
wind_range = ""  # Initialize for later use

if FILTER_WEATHER_CODE_MIN is not None or FILTER_WEATHER_CODE_MAX is not None:
    weather_code_range = f"{FILTER_WEATHER_CODE_MIN if FILTER_WEATHER_CODE_MIN is not None else 'any'} to {FILTER_WEATHER_CODE_MAX if FILTER_WEATHER_CODE_MAX is not None else 'any'}"
    filter_desc.append(f"weather_code: {weather_code_range}")

if FILTER_WIND_SPEED_MIN is not None or FILTER_WIND_SPEED_MAX is not None:
    wind_range = f"{FILTER_WIND_SPEED_MIN if FILTER_WIND_SPEED_MIN is not None else 'any'} to {FILTER_WIND_SPEED_MAX if FILTER_WIND_SPEED_MAX is not None else 'any'}"
    filter_desc.append(f"wind_speed_180m: {wind_range}")

if filter_desc:
    print(f"\nApplying filters: {', '.join(filter_desc)}")
    print(f"  (Note: Filters applied for correlation calculations only; plots will show all data)")

    # Apply filters
    if FILTER_WEATHER_CODE_MIN is not None or FILTER_WEATHER_CODE_MAX is not None:
        if 'weather_code' in df.columns:
            if FILTER_WEATHER_CODE_MIN is not None:
                df = df[df['weather_code'] >= FILTER_WEATHER_CODE_MIN]
            if FILTER_WEATHER_CODE_MAX is not None:
                df = df[df['weather_code'] <= FILTER_WEATHER_CODE_MAX]
            print(f"  Filtered by weather_code: {weather_code_range}")
        else:
            print(f"  WARNING: 'weather_code' column not found in data")

    if FILTER_WIND_SPEED_MIN is not None or FILTER_WIND_SPEED_MAX is not None:
        if 'wind_speed_180m' in df.columns:
            if FILTER_WIND_SPEED_MIN is not None:
                df = df[df['wind_speed_180m'] >= FILTER_WIND_SPEED_MIN]
            if FILTER_WIND_SPEED_MAX is not None:
                df = df[df['wind_speed_180m'] <= FILTER_WIND_SPEED_MAX]
            print(f"  Filtered by wind_speed_180m: {wind_range}")
        else:
            print(f"  WARNING: 'wind_speed_180m' column not found in data")

    df = df.reset_index(drop=True)
    print(f"  Records after filtering: {len(df)}/{original_count} ({len(df)/original_count*100:.1f}%)")
else:
    print("\nNo filters applied")

print(f"\nData time range: {df[TIME_COLUMN].min()} to {df[TIME_COLUMN].max()}")
print(f"Total records: {len(df)}")


## Find Worst 3-Day Correlation Period

Chunk the timeline into 3-day periods and identify the period with the worst correlation
between POA irradiance and power.


In [None]:
# Extract date from timestamp
df['date'] = df[TIME_COLUMN].dt.date

# Get the date range
min_date = df['date'].min()
max_date = df['date'].max()

# Create 3-day periods
three_day_periods = []
current_date = min_date

while current_date <= max_date:
    end_date = current_date + timedelta(days=2)  # 3 consecutive days
    if end_date > max_date:
        end_date = max_date
    three_day_periods.append((current_date, end_date))
    current_date = current_date + timedelta(days=3)  # Move to next 3-day period

print(f"Total 3-day periods to analyze: {len(three_day_periods)}")

# Calculate correlation for each 3-day period
correlations = []

for start, end in three_day_periods:
    period_data = df[(df['date'] >= start) & (df['date'] <= end)]

    # Only calculate correlation if we have enough data points
    if len(period_data) >= 10:
        # Remove any rows where POA or Power is 0 or NaN for better correlation
        period_clean = period_data[(period_data[POA_COLUMN] > 0) &
                                   (period_data[POWER_COLUMN] > 0)].copy()

        if len(period_clean) >= 10:
            corr = period_clean[POA_COLUMN].corr(period_clean[POWER_COLUMN])
            correlations.append({
                'start_date': start,
                'end_date': end,
                'correlation': corr,
                'num_points': len(period_clean),
                'num_total_points': len(period_data)
            })

# Create DataFrame of correlations and sort by correlation
corr_df = pd.DataFrame(correlations)
corr_df = corr_df.sort_values('correlation')

print("\n" + "="*70)
print("WORST 10 THREE-DAY PERIODS (by correlation):")
print("="*70)
print(corr_df.head(10).to_string(index=False))

print("\n" + "="*70)
print("BEST 10 THREE-DAY PERIODS (by correlation):")
print("="*70)
print(corr_df.tail(10).to_string(index=False))

# Store the worst period for later use
worst_period = corr_df.iloc[0]
print("\n" + "="*70)
print("WORST CORRELATION PERIOD:")
print("="*70)
print(f"Date Range: {worst_period['start_date']} to {worst_period['end_date']}")
print(f"Correlation: {worst_period['correlation']:.4f}")
print(f"Data Points: {worst_period['num_points']} (out of {worst_period['num_total_points']} total)")
print("="*70)


## Correlation Distribution

Visualize the distribution of correlations across all 3-day periods.


In [None]:
# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Histogram of correlations
ax1.hist(corr_df['correlation'], bins=30, color='tab:blue', alpha=0.7, edgecolor='black')
ax1.axvline(worst_period['correlation'], color='red', linestyle='--', linewidth=2,
            label=f'Worst: {worst_period["correlation"]:.4f}')
ax1.set_xlabel('Correlation Coefficient', fontsize=12)
ax1.set_ylabel('Frequency', fontsize=12)
ax1.set_title('Distribution of 3-Day Period Correlations', fontsize=13, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Time series of correlations
corr_df_sorted_by_date = corr_df.sort_values('start_date')
ax2.plot(range(len(corr_df_sorted_by_date)), corr_df_sorted_by_date['correlation'],
         marker='o', markersize=4, linewidth=1, alpha=0.7)
worst_idx = corr_df_sorted_by_date.index[corr_df_sorted_by_date['correlation'] == worst_period['correlation']].tolist()[0]
worst_pos = corr_df_sorted_by_date.index.get_loc(worst_idx)
ax2.scatter([worst_pos], [worst_period['correlation']], color='red', s=100, zorder=5,
            label=f'Worst Period')
ax2.set_xlabel('Period Index (chronological)', fontsize=12)
ax2.set_ylabel('Correlation Coefficient', fontsize=12)
ax2.set_title('Correlation Over Time', fontsize=13, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()


## Visualize Worst 3-Day Period

Plot the worst correlation period to understand what's happening.


In [None]:
# Get data for the worst period (use ORIGINAL unfiltered data for plotting)
worst_start = pd.to_datetime(worst_period['start_date'])
worst_end = pd.to_datetime(worst_period['end_date']) + timedelta(days=1)  # Include full day

worst_data = df_original[(df_original[TIME_COLUMN] >= worst_start) & (df_original[TIME_COLUMN] < worst_end)].copy()

# Calculate clear sky index and normalized values
worst_data['clear_sky_index'] = 1 - (worst_data[CLOUD_COVER_COLUMN] / 100)
worst_data['poa_normalized'] = worst_data[POA_COLUMN] / worst_data[POA_COLUMN].max() if worst_data[POA_COLUMN].max() > 0 else 0
worst_data['power_normalized'] = worst_data[POWER_COLUMN] / worst_data[POWER_COLUMN].max() if worst_data[POWER_COLUMN].max() > 0 else 0

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Plot 1: Time series
ax1.plot(worst_data[TIME_COLUMN], worst_data['poa_normalized'],
         color='tab:orange', linewidth=1.5, label='POA Irradiance (normalized)', alpha=0.8)
ax1.plot(worst_data[TIME_COLUMN], worst_data['power_normalized'],
         color='tab:blue', linewidth=1.5, label='Power (normalized)', alpha=0.8)
ax1.plot(worst_data[TIME_COLUMN], worst_data['clear_sky_index'],
         color='gray', linewidth=1.5, linestyle=':', label='Clear Sky Index', alpha=0.8)

ax1.set_xlabel('Time', fontsize=12)
ax1.set_ylabel('Normalized Value', fontsize=12)
ax1.set_ylim([0, 1.05])
ax1.grid(True, alpha=0.3)
ax1.set_title(f'WORST 3-Day Period: {worst_period["start_date"]} to {worst_period["end_date"]}\n' +
              f'Correlation: {worst_period["correlation"]:.4f}',
              fontsize=14, fontweight='bold', pad=20)
ax1.legend(loc='upper left', fontsize=10)
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')

# Plot 2: Scatter plot
clean_worst = worst_data[(worst_data[POA_COLUMN] > 0) & (worst_data[POWER_COLUMN] > 0)]
ax2.scatter(clean_worst[POA_COLUMN], clean_worst[POWER_COLUMN],
            alpha=0.5, s=20, c='tab:red')

ax2.set_xlabel('POA Irradiance (W/m²)', fontsize=12)
ax2.set_ylabel('Power (W)', fontsize=12)
ax2.set_title('POA vs Power Scatter Plot', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()


## Compare with Best 3-Day Period

For comparison, let's also visualize the best correlation period.


In [None]:
# Get the best period
best_period = corr_df.iloc[-1]

print("="*70)
print("BEST CORRELATION PERIOD:")
print("="*70)
print(f"Date Range: {best_period['start_date']} to {best_period['end_date']}")
print(f"Correlation: {best_period['correlation']:.4f}")
print(f"Data Points: {best_period['num_points']} (out of {best_period['num_total_points']} total)")
print("="*70)

# Get data for the best period (use ORIGINAL unfiltered data for plotting)
best_start = pd.to_datetime(best_period['start_date'])
best_end = pd.to_datetime(best_period['end_date']) + timedelta(days=1)

best_data = df_original[(df_original[TIME_COLUMN] >= best_start) & (df_original[TIME_COLUMN] < best_end)].copy()

# Calculate clear sky index and normalized values
best_data['clear_sky_index'] = 1 - (best_data[CLOUD_COVER_COLUMN] / 100)
best_data['poa_normalized'] = best_data[POA_COLUMN] / best_data[POA_COLUMN].max() if best_data[POA_COLUMN].max() > 0 else 0
best_data['power_normalized'] = best_data[POWER_COLUMN] / best_data[POWER_COLUMN].max() if best_data[POWER_COLUMN].max() > 0 else 0

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Plot 1: Time series
ax1.plot(best_data[TIME_COLUMN], best_data['poa_normalized'],
         color='tab:orange', linewidth=1.5, label='POA Irradiance (normalized)', alpha=0.8)
ax1.plot(best_data[TIME_COLUMN], best_data['power_normalized'],
         color='tab:blue', linewidth=1.5, label='Power (normalized)', alpha=0.8)
ax1.plot(best_data[TIME_COLUMN], best_data['clear_sky_index'],
         color='gray', linewidth=1.5, linestyle=':', label='Clear Sky Index', alpha=0.8)

ax1.set_xlabel('Time', fontsize=12)
ax1.set_ylabel('Normalized Value', fontsize=12)
ax1.set_ylim([0, 1.05])
ax1.grid(True, alpha=0.3)
ax1.set_title(f'BEST 3-Day Period: {best_period["start_date"]} to {best_period["end_date"]}\n' +
              f'Correlation: {best_period["correlation"]:.4f}',
              fontsize=14, fontweight='bold', pad=20)
ax1.legend(loc='upper left', fontsize=10)
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')

# Plot 2: Scatter plot
clean_best = best_data[(best_data[POA_COLUMN] > 0) & (best_data[POWER_COLUMN] > 0)]
ax2.scatter(clean_best[POA_COLUMN], clean_best[POWER_COLUMN],
            alpha=0.5, s=20, c='tab:green')

ax2.set_xlabel('POA Irradiance (W/m²)', fontsize=12)
ax2.set_ylabel('Power (W)', fontsize=12)
ax2.set_title('POA vs Power Scatter Plot', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()


## Monthly Correlation Analysis

Analyze the correlation between POA and power for each month of the year.


In [None]:
# Extract month from timestamp
df['month'] = df[TIME_COLUMN].dt.month
df['month_name'] = df[TIME_COLUMN].dt.strftime('%B')

# Calculate correlation for each month
monthly_correlations = []

for month in range(1, 13):
    month_data = df[df['month'] == month]

    # Only calculate correlation if we have enough data points
    if len(month_data) >= 10:
        # Remove any rows where POA or Power is 0 or NaN for better correlation
        month_clean = month_data[(month_data[POA_COLUMN] > 0) &
                                  (month_data[POWER_COLUMN] > 0)].copy()

        if len(month_clean) >= 10:
            corr = month_clean[POA_COLUMN].corr(month_clean[POWER_COLUMN])
            month_name = month_data['month_name'].iloc[0]
            monthly_correlations.append({
                'month': month,
                'month_name': month_name,
                'correlation': corr,
                'num_points': len(month_clean),
                'num_total_points': len(month_data)
            })

# Create DataFrame of monthly correlations
monthly_corr_df = pd.DataFrame(monthly_correlations)

print("="*70)
print("CORRELATION BY MONTH:")
print("="*70)
print(monthly_corr_df.to_string(index=False))
print("="*70)

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create bar plot of monthly correlations
fig, ax = plt.subplots(figsize=(14, 6))

colors = ['tab:blue' if corr >= 0.9 else 'tab:orange' if corr >= 0.8 else 'tab:red'
          for corr in monthly_corr_df['correlation']]

bars = ax.bar(monthly_corr_df['month_name'], monthly_corr_df['correlation'],
               color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)

# Add value labels on top of bars
for i, (bar, corr) in enumerate(zip(bars, monthly_corr_df['correlation'])):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{corr:.4f}',
            ha='center', va='bottom', fontsize=10, fontweight='bold')

ax.set_xlabel('Month', fontsize=12, fontweight='bold')
ax.set_ylabel('Correlation Coefficient', fontsize=12, fontweight='bold')
ax.set_title('POA vs Power Correlation by Month', fontsize=14, fontweight='bold', pad=20)
ax.set_ylim([0, 1.05])
ax.grid(True, alpha=0.3, axis='y')
plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

# Add horizontal line at 0.9 for reference
ax.axhline(y=0.9, color='green', linestyle='--', linewidth=1.5, alpha=0.5, label='Good correlation (0.9)')
ax.axhline(y=0.8, color='orange', linestyle='--', linewidth=1.5, alpha=0.5, label='Moderate correlation (0.8)')
ax.legend(loc='lower right')

plt.tight_layout()
display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()

print(f"\nBest month for correlation: {monthly_corr_df.loc[monthly_corr_df['correlation'].idxmax(), 'month_name']} "
      f"({monthly_corr_df['correlation'].max():.4f})")
print(f"Worst month for correlation: {monthly_corr_df.loc[monthly_corr_df['correlation'].idxmin(), 'month_name']} "
      f"({monthly_corr_df['correlation'].min():.4f})")


## Correlation by Weather Code

Analyze the correlation between POA and power for each weather code.


In [None]:
# WMO Weather interpretation codes (WW)
WMO_WEATHER_CODES = {
    0: "Clear sky",
    1: "Mainly clear",
    2: "Partly cloudy",
    3: "Overcast",
    45: "Fog",
    48: "Depositing rime fog",
    51: "Drizzle: Light",
    53: "Drizzle: Moderate",
    55: "Drizzle: Dense",
    56: "Freezing Drizzle: Light",
    57: "Freezing Drizzle: Dense",
    61: "Rain: Slight",
    63: "Rain: Moderate",
    65: "Rain: Heavy",
    66: "Freezing Rain: Light",
    67: "Freezing Rain: Heavy",
    71: "Snow fall: Slight",
    73: "Snow fall: Moderate",
    75: "Snow fall: Heavy",
    77: "Snow grains",
    80: "Rain showers: Slight",
    81: "Rain showers: Moderate",
    82: "Rain showers: Violent",
    85: "Snow showers: Slight",
    86: "Snow showers: Heavy",
    95: "Thunderstorm: Slight or moderate",
    96: "Thunderstorm with slight hail",
    99: "Thunderstorm with heavy hail"
}

def get_weather_description(code):
    """Get WMO weather description for a given code."""
    return WMO_WEATHER_CODES.get(int(code), f"Unknown ({int(code)})")

# Check if weather_code column exists
if 'weather_code' in df.columns:
    # Calculate correlation for each weather code
    weather_code_correlations = []

    # Get unique weather codes
    weather_codes = sorted(df['weather_code'].dropna().unique())

    for code in weather_codes:
        code_data = df[df['weather_code'] == code]

        # Only calculate correlation if we have enough data points
        if len(code_data) >= 10:
            # Remove any rows where POA or Power is 0 or NaN for better correlation
            code_clean = code_data[(code_data[POA_COLUMN] > 0) &
                                   (code_data[POWER_COLUMN] > 0)].copy()

            if len(code_clean) >= 10:
                corr = code_clean[POA_COLUMN].corr(code_clean[POWER_COLUMN])
                weather_code_correlations.append({
                    'weather_code': int(code),
                    'description': get_weather_description(code),
                    'correlation': corr,
                    'num_points': len(code_clean),
                    'num_total_points': len(code_data)
                })

    # Create DataFrame of weather code correlations
    weather_corr_df = pd.DataFrame(weather_code_correlations)

    print("="*70)
    print("CORRELATION BY WEATHER CODE:")
    print("="*70)
    print(weather_corr_df.to_string(index=False))
    print("="*70)

    # Temporarily disable interactive plotting
    _prev_interactive_state = plt.isinteractive()
    plt.ioff()

    # Create bar plot of weather code correlations
    fig, ax = plt.subplots(figsize=(16, 7))

    colors = ['tab:blue' if corr >= 0.9 else 'tab:orange' if corr >= 0.8 else 'tab:red'
              for corr in weather_corr_df['correlation']]

    # Create labels with code and description
    x_labels = [f"{row['weather_code']}\n{row['description']}"
                for _, row in weather_corr_df.iterrows()]

    bars = ax.bar(range(len(weather_corr_df)), weather_corr_df['correlation'],
                   color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)

    # Set x-axis labels
    ax.set_xticks(range(len(weather_corr_df)))
    ax.set_xticklabels(x_labels, fontsize=9)

    # Add value labels on top of bars
    for i, (bar, corr) in enumerate(zip(bars, weather_corr_df['correlation'])):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{corr:.4f}',
                ha='center', va='bottom', fontsize=9, fontweight='bold')

    ax.set_xlabel('Weather Code', fontsize=12, fontweight='bold')
    ax.set_ylabel('Correlation Coefficient', fontsize=12, fontweight='bold')
    ax.set_title('POA vs Power Correlation by Weather Code', fontsize=14, fontweight='bold', pad=20)
    ax.set_ylim([0, 1.05])
    ax.grid(True, alpha=0.3, axis='y')

    # Add horizontal line at 0.9 for reference
    ax.axhline(y=0.9, color='green', linestyle='--', linewidth=1.5, alpha=0.5, label='Good correlation (0.9)')
    ax.axhline(y=0.8, color='orange', linestyle='--', linewidth=1.5, alpha=0.5, label='Moderate correlation (0.8)')
    ax.legend(loc='lower right')

    plt.tight_layout()
    display(fig)
    plt.close(fig)

    # Restore interactive plotting state
    if _prev_interactive_state:
        plt.ion()

    best_weather = weather_corr_df.loc[weather_corr_df['correlation'].idxmax()]
    worst_weather = weather_corr_df.loc[weather_corr_df['correlation'].idxmin()]

    print(f"\nBest weather code for correlation: {best_weather['weather_code']} - {best_weather['description']} "
          f"({best_weather['correlation']:.4f})")
    print(f"Worst weather code for correlation: {worst_weather['weather_code']} - {worst_weather['description']} "
          f"({worst_weather['correlation']:.4f})")
else:
    print("WARNING: 'weather_code' column not found in data")


## Summary Statistics

Compare key statistics between the worst and best periods.


In [None]:
print("="*70)
print("COMPARISON: WORST vs BEST PERIODS")
print("="*70)

print(f"\nWORST PERIOD ({worst_period['start_date']} to {worst_period['end_date']}):")
print(f"  Correlation: {worst_period['correlation']:.4f}")
print(f"  POA Mean: {clean_worst[POA_COLUMN].mean():.2f} W/m²")
print(f"  POA Std: {clean_worst[POA_COLUMN].std():.2f} W/m²")
print(f"  Power Mean: {clean_worst[POWER_COLUMN].mean():.2f} W")
print(f"  Power Std: {clean_worst[POWER_COLUMN].std():.2f} W")
print(f"  Clear Sky Index Mean: {worst_data['clear_sky_index'].mean():.3f}")

print(f"\nBEST PERIOD ({best_period['start_date']} to {best_period['end_date']}):")
print(f"  Correlation: {best_period['correlation']:.4f}")
print(f"  POA Mean: {clean_best[POA_COLUMN].mean():.2f} W/m²")
print(f"  POA Std: {clean_best[POA_COLUMN].std():.2f} W/m²")
print(f"  Power Mean: {clean_best[POWER_COLUMN].mean():.2f} W")
print(f"  Power Std: {clean_best[POWER_COLUMN].std():.2f} W")
print(f"  Clear Sky Index Mean: {best_data['clear_sky_index'].mean():.3f}")

print("\n" + "="*70)
