# POA Irradiance and Power Plotting

This notebook reads a CSV file containing POA (Plane of Array) irradiance and power data,
and plots them together over a specified time interval.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')


## Configuration

Set the file path and parameters for the plot.


In [None]:
# Path to the CSV file
CSV_FILE = Path('../../data/data-3/timeseries/61272.csv')

# Optional: Specify a time interval to filter the data
# Set to None to plot all data
# START_DATE = None  # e.g., '2023-06-01'
# END_DATE = None    # e.g., '2023-06-07'

# START_DATE = '2025-06-14'
# END_DATE = '2025-06-18'
START_DATE = '2024-12-09'
END_DATE = '2024-12-15'

# Column names (adjust these based on your CSV structure)
TIME_COLUMN = 'time'  # Name of the time/datetime column
POA_COLUMN = 'plane_of_array_irradiance'  # Name of the POA irradiance column
POWER_COLUMN = 'power'  # Name of the power column
CLOUD_COVER_COLUMN = 'cloud_cover'  # Name of the cloud cover column (percentage)

# Filtering options (set to None to disable filtering)
FILTER_WEATHER_CODE_MIN = None  # e.g., 0 or None for no minimum
FILTER_WEATHER_CODE_MAX = None  # e.g., 3 or None for no maximum
FILTER_WIND_SPEED_MIN = None  # e.g., 0.0 or None for no minimum
FILTER_WIND_SPEED_MAX = None  # e.g., 10.0 or None for no maximum


## Load Data

Read the CSV file and parse the time column.


In [None]:
# Read the CSV file
print(f"Loading data from: {CSV_FILE}")
df = pd.read_csv(CSV_FILE)

# Display first few rows to inspect the data
print("\nFirst few rows of the dataset:")
print(df.head())

print("\nDataset shape:", df.shape)
print("\nColumn names:")
print(df.columns.tolist())


In [None]:
# Parse the time column as datetime
df[TIME_COLUMN] = pd.to_datetime(df[TIME_COLUMN])

# Sort by time to ensure proper plotting
df = df.sort_values(TIME_COLUMN).reset_index(drop=True)

# Store the ORIGINAL unfiltered dataframe for plotting
df_original = df.copy()

# Store original record count
original_count = len(df)

# Build filter description and apply filters
filter_desc = []
weather_code_range = ""  # Initialize for later use
wind_range = ""  # Initialize for later use

if FILTER_WEATHER_CODE_MIN is not None or FILTER_WEATHER_CODE_MAX is not None:
    weather_code_range = f"{FILTER_WEATHER_CODE_MIN if FILTER_WEATHER_CODE_MIN is not None else 'any'} to {FILTER_WEATHER_CODE_MAX if FILTER_WEATHER_CODE_MAX is not None else 'any'}"
    filter_desc.append(f"weather_code: {weather_code_range}")

if FILTER_WIND_SPEED_MIN is not None or FILTER_WIND_SPEED_MAX is not None:
    wind_range = f"{FILTER_WIND_SPEED_MIN if FILTER_WIND_SPEED_MIN is not None else 'any'} to {FILTER_WIND_SPEED_MAX if FILTER_WIND_SPEED_MAX is not None else 'any'}"
    filter_desc.append(f"wind_speed_180m: {wind_range}")

if filter_desc:
    print(f"\nApplying filters: {', '.join(filter_desc)}")
    print(f"  (Note: Filters applied for correlation calculations only; plots will show all data)")

    # Apply filters
    if FILTER_WEATHER_CODE_MIN is not None or FILTER_WEATHER_CODE_MAX is not None:
        if 'weather_code' in df.columns:
            if FILTER_WEATHER_CODE_MIN is not None:
                df = df[df['weather_code'] >= FILTER_WEATHER_CODE_MIN]
            if FILTER_WEATHER_CODE_MAX is not None:
                df = df[df['weather_code'] <= FILTER_WEATHER_CODE_MAX]
            print(f"  Filtered by weather_code: {weather_code_range}")
        else:
            print(f"  WARNING: 'weather_code' column not found in data")

    if FILTER_WIND_SPEED_MIN is not None or FILTER_WIND_SPEED_MAX is not None:
        if 'wind_speed_180m' in df.columns:
            if FILTER_WIND_SPEED_MIN is not None:
                df = df[df['wind_speed_180m'] >= FILTER_WIND_SPEED_MIN]
            if FILTER_WIND_SPEED_MAX is not None:
                df = df[df['wind_speed_180m'] <= FILTER_WIND_SPEED_MAX]
            print(f"  Filtered by wind_speed_180m: {wind_range}")
        else:
            print(f"  WARNING: 'wind_speed_180m' column not found in data")

    df = df.reset_index(drop=True)
    print(f"  Records after filtering: {len(df)}/{original_count} ({len(df)/original_count*100:.1f}%)")
else:
    print("\nNo filters applied")

print(f"\nData time range: {df[TIME_COLUMN].min()} to {df[TIME_COLUMN].max()}")
print(f"Total records: {len(df)}")


## Filter Data by Time Interval

Optionally filter the data to a specific time range.


In [None]:
# Filter data by time interval if specified
# For plotting, use the ORIGINAL unfiltered data; for correlation, use filtered data
if START_DATE is not None or END_DATE is not None:
    mask_original = pd.Series([True] * len(df_original))
    mask_filtered = pd.Series([True] * len(df))

    if START_DATE is not None:
        start_dt = pd.to_datetime(START_DATE)
        mask_original &= (df_original[TIME_COLUMN] >= start_dt)
        mask_filtered &= (df[TIME_COLUMN] >= start_dt)
        print(f"Filtering data from: {start_dt}")

    if END_DATE is not None:
        end_dt = pd.to_datetime(END_DATE)
        mask_original &= (df_original[TIME_COLUMN] <= end_dt)
        mask_filtered &= (df[TIME_COLUMN] <= end_dt)
        print(f"Filtering data until: {end_dt}")

    df_plot = df_original[mask_original].copy()  # Use for plotting (all data)
    df_filtered = df[mask_filtered].copy()  # Use for correlation (filtered data)
    print(f"\nFiltered records for plotting: {len(df_plot)}")
    print(f"Filtered records for correlation: {len(df_filtered)}")
else:
    df_plot = df_original.copy()  # Use for plotting (all data)
    df_filtered = df.copy()  # Use for correlation (filtered data)
    print("Using all available data")

# Display summary statistics for the filtered (correlation) data
print("\nSummary statistics (filtered data for correlation):")
print(df_filtered[[POA_COLUMN, POWER_COLUMN]].describe())

# Calculate clear sky index for both datasets
df_plot['clear_sky_index'] = 1 - (df_plot[CLOUD_COVER_COLUMN] / 100)
df_filtered['clear_sky_index'] = 1 - (df_filtered[CLOUD_COVER_COLUMN] / 100)
print("\nClear Sky Index statistics (filtered data for correlation):")
print(df_filtered['clear_sky_index'].describe())


## Plot Normalized POA, Power, and Clear Sky Index

Create a plot showing normalized POA irradiance, power, and clear sky index over time.
All values are normalized to 0-1 scale for easy comparison.


In [None]:
# Normalize POA and Power by their maximum values (use plot data for display)
df_plot['poa_normalized'] = df_plot[POA_COLUMN] / df_plot[POA_COLUMN].max()
df_plot['power_normalized'] = df_plot[POWER_COLUMN] / df_plot[POWER_COLUMN].max()

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create figure and axis
fig, ax = plt.subplots(figsize=(14, 6))

# Plot normalized POA, power, and clear sky index (using unfiltered data)
ax.plot(df_plot[TIME_COLUMN], df_plot['poa_normalized'],
        color='tab:orange', linewidth=1.5, label='POA Irradiance (normalized)', alpha=0.8)
ax.plot(df_plot[TIME_COLUMN], df_plot['power_normalized'],
        color='tab:blue', linewidth=1.5, label='Power (normalized)', alpha=0.8)
ax.plot(df_plot[TIME_COLUMN], df_plot['clear_sky_index'],
        color='gray', linewidth=1.5, linestyle=':', label='Clear Sky Index', alpha=0.8)

ax.set_xlabel('Time', fontsize=12)
ax.set_ylabel('Normalized Value', fontsize=12)
ax.set_ylim([0, 1.05])
ax.grid(True, alpha=0.3)

# Add title
title = 'Normalized POA Irradiance, Power, and Clear Sky Index Over Time'
if START_DATE or END_DATE:
    if START_DATE and END_DATE:
        title += f'\n({START_DATE} to {END_DATE})'
    elif START_DATE:
        title += f'\n(from {START_DATE})'
    elif END_DATE:
        title += f'\n(until {END_DATE})'
plt.title(title, fontsize=14, fontweight='bold', pad=20)

# Add legend
ax.legend(loc='upper left', fontsize=10)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Display the plot
display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()


## Interactive Time Range Selection

Select a specific date range to zoom in on the data.


In [None]:
# Example: Plot a specific week
# Uncomment and modify the dates below to plot a specific time range

# specific_start = '2023-06-15'
# specific_end = '2023-06-22'
#
# mask = (df[TIME_COLUMN] >= pd.to_datetime(specific_start)) & \
#        (df[TIME_COLUMN] <= pd.to_datetime(specific_end))
# df_week = df[mask].copy()
#
# # Calculate clear sky index and normalized values for the week
# df_week['clear_sky_index'] = 1 - (df_week[CLOUD_COVER_COLUMN] / 100)
# df_week['poa_normalized'] = df_week[POA_COLUMN] / df_week[POA_COLUMN].max()
# df_week['power_normalized'] = df_week[POWER_COLUMN] / df_week[POWER_COLUMN].max()
#
# # Temporarily disable interactive plotting
# _prev_interactive_state = plt.isinteractive()
# plt.ioff()
#
# fig, ax = plt.subplots(figsize=(14, 6))
#
# ax.plot(df_week[TIME_COLUMN], df_week['poa_normalized'],
#         color='tab:orange', linewidth=2, label='POA Irradiance (normalized)', alpha=0.8)
# ax.plot(df_week[TIME_COLUMN], df_week['power_normalized'],
#         color='tab:blue', linewidth=2, label='Power (normalized)', alpha=0.8)
# ax.plot(df_week[TIME_COLUMN], df_week['clear_sky_index'],
#         color='gray', linewidth=2, linestyle=':', label='Clear Sky Index', alpha=0.8)
#
# ax.set_xlabel('Time', fontsize=12)
# ax.set_ylabel('Normalized Value', fontsize=12)
# ax.set_ylim([0, 1.05])
# ax.grid(True, alpha=0.3)
#
# plt.title(f'Normalized POA Irradiance, Power, and Clear Sky Index\n({specific_start} to {specific_end})',
#           fontsize=14, fontweight='bold', pad=20)
#
# ax.legend(loc='upper left', fontsize=10)
#
# plt.xticks(rotation=45, ha='right')
# plt.tight_layout()
#
# display(fig)
# plt.close(fig)
#
# # Restore interactive plotting state
# if _prev_interactive_state:
#     plt.ion()


## Correlation Analysis

Analyze the correlation between POA irradiance and power.


In [None]:
# Calculate correlation
correlation = df_filtered[POA_COLUMN].corr(df_filtered[POWER_COLUMN])
print(f"Correlation between POA Irradiance and Power: {correlation:.4f}")

# Temporarily disable interactive plotting
_prev_interactive_state = plt.isinteractive()
plt.ioff()

# Create scatter plot
fig, ax = plt.subplots(figsize=(10, 6))

scatter = ax.scatter(df_filtered[POA_COLUMN], df_filtered[POWER_COLUMN],
                     alpha=0.5, s=10, c='tab:blue')

ax.set_xlabel('POA Irradiance (W/mÂ²)', fontsize=12)
ax.set_ylabel('Power (W)', fontsize=12)
ax.set_title(f'POA Irradiance vs Power\nCorrelation: {correlation:.4f}',
             fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

plt.tight_layout()

display(fig)
plt.close(fig)

# Restore interactive plotting state
if _prev_interactive_state:
    plt.ion()
