In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read the merged dataset
df = pd.read_csv('merged 17 unit 2.csv')

# Convert deliveryStart to datetime and sort
df['deliveryStart'] = pd.to_datetime(df['deliveryStart'])

# Filter data from June 2024 onwards
df = df[df['deliveryStart'] >= '2024-06-01']

df = df.sort_values(['Unit code', 'deliveryStart'])

# Create the plot
plt.figure(figsize=(16, 10))

# Get unique units
units = df['Unit code'].unique()

# Define colors for better visibility
colors = plt.cm.tab20(np.linspace(0, 1, len(units)))

# Plot each unit
for i, unit in enumerate(units):
    unit_data = df[df['Unit code'] == unit].copy()
    
    if len(unit_data) > 0:
        # Calculate 7-day moving average (42 periods = 7 days * 6 EFA blocks)
        window_size = min(42, len(unit_data))
        moving_avg = unit_data['% of capacity bid into D*'].rolling(window=window_size, center=True).mean()
        
        # Plot the line
        line = plt.plot(unit_data['deliveryStart'], moving_avg, 
                       color=colors[i], linewidth=2, alpha=0.8, label=unit)
        
        # Annotate with unit name at the end of the line
        if not moving_avg.isna().all():
            last_valid_idx = moving_avg.last_valid_index()
            if last_valid_idx is not None:
                plt.annotate(unit, 
                           xy=(unit_data.loc[last_valid_idx, 'deliveryStart'], moving_avg.loc[last_valid_idx]),
                           xytext=(10, 0), textcoords='offset points',
                           fontsize=9, color=colors[i], fontweight='bold',
                           va='center')

plt.xlabel('Time (4-hour EFA blocks)', fontsize=12)
plt.ylabel('% of Capacity Bid into D*', fontsize=12)
plt.title('Moving Average % Capacity Bid into D* Markets by Unit (June 2024 onwards)', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.ylim(0, 100)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Add a legend (optional, since we have annotations)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)

plt.tight_layout()
plt.show()

# Print summary statistics
print("\nSummary Statistics (June 2024 onwards):")
print("=" * 50)
for unit in units:
    unit_data = df[df['Unit code'] == unit]
    if len(unit_data) > 0:
        avg_capacity = unit_data['% of capacity bid into D*'].mean()
        print(f"{unit}: Average {avg_capacity:.1f}% capacity")