### Arugam Bay Surf Analysis Pipeline - Full Year 2022
##### Extracts and analyzes wave height, wind speed/direction for the famous Sri Lankan surf spot
##### Target: 6.81°N, 81.83°E (Arugam Bay Main Point)

In [1]:
import cdsapi
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import os
from datetime import datetime, timedelta
import seaborn as sns

#### Config

In [2]:
# Arugam Bay coordinates (famous surf spot)
TARGET_LAT = 6.81  # °N 
TARGET_LON = 81.83  # °E

# Data parameters
YEAR = 2022
AREA = [TARGET_LAT + 0.25, TARGET_LON - 0.25, TARGET_LAT - 0.25, TARGET_LON + 0.25]  # Small grid around Arugam Bay
TIMES = ['00:00', '06:00', '12:00', '18:00']  # 6-hourly data
MONTHS = [f'{i:02d}' for i in range(1, 13)]

#### Data Extraction & Preparation

In [3]:
def download_era5_data():
    """Download ERA5 reanalysis data for full year 2022"""
    c = cdsapi.Client()
    
    # Atmospheric data
    print("Downloading atmospheric data...")
    c.retrieve('reanalysis-era5-single-levels', {
        'product_type': 'reanalysis',
        'variable': ['10m_u_component_of_wind', '10m_v_component_of_wind', 'mean_sea_level_pressure'],
        'year': str(YEAR),
        'month': MONTHS,
        'day': [f'{i:02d}' for i in range(1, 32)],
        'time': TIMES,
        'area': AREA,
        'format': 'netcdf',
    }, 'era5_atmos_2022.nc')
    
    # Wave data
    print("Downloading wave data...")
    c.retrieve('reanalysis-era5-single-levels', {
        'product_type': 'reanalysis',
        'variable': 'significant_height_of_combined_wind_waves_and_swell',
        'year': str(YEAR),
        'month': MONTHS,
        'day': [f'{i:02d}' for i in range(1, 32)],
        'time': TIMES,
        'area': AREA,
        'format': 'netcdf',
    }, 'era5_waves_2022.nc')

In [4]:
def extract_and_clean_data():
    """Load, merge and extract time series at target point"""
    print("Loading and processing data...")
    
    # Load datasets
    ds_atmos = xr.open_dataset('era5_atmos_2022.nc')
    ds_waves = xr.open_dataset('era5_waves_2022.nc')
    
    # Merge datasets
    ds = xr.merge([ds_atmos, ds_waves])
    
    # Extract nearest point to Arugam Bay
    point = ds.sel(latitude=TARGET_LAT, longitude=TARGET_LON, method='nearest')
    
    # Calculate derived variables
    wind_speed = np.sqrt(point.u10**2 + point.v10**2)
    wind_direction = (180/np.pi) * np.arctan2(point.u10, point.v10) % 360  # Meteorological convention
    
    # Create DataFrame
    df = pd.DataFrame({
        'datetime': pd.to_datetime(point.time.values),
        'swh': point.swh.values,
        'wind_speed': wind_speed.values,
        'wind_direction': wind_direction.values,
        'u_wind': point.u10.values,
        'v_wind': point.v10.values,
        'mslp': point.msl.values / 100  # Convert to hPa
    })
    
    df.set_index('datetime', inplace=True)
    return df

#### Analysis Function

In [5]:
def analyze_surf_seasons(df):
    """Analyze seasonal patterns in wave height"""
    df['month'] = df.index.month
    df['season'] = df['month'].map({
        12: 'NE_Monsoon', 1: 'NE_Monsoon', 2: 'NE_Monsoon',
        3: 'Inter_Monsoon', 4: 'Inter_Monsoon', 5: 'Inter_Monsoon',
        6: 'SW_Monsoon', 7: 'SW_Monsoon', 8: 'SW_Monsoon',
        9: 'SW_Monsoon', 10: 'SW_Monsoon', 11: 'SW_Monsoon'
    })
    
    monthly_stats = df.groupby('month').agg({
        'swh': ['mean', 'std', 'max'],
        'wind_speed': ['mean', 'std'],
        'wind_direction': lambda x: np.degrees(np.arctan2(np.sin(np.radians(x)).mean(), 
                                                          np.cos(np.radians(x)).mean())) % 360
    })
    
    return monthly_stats

In [6]:
def create_visualizations(df):
    """Create comprehensive surf analysis plots"""
    fig = plt.figure(figsize=(16, 12))
    
    # 1. Full year wave height time series
    ax1 = plt.subplot(3, 2, 1)
    df['swh'].plot(ax=ax1, color='steelblue', alpha=0.7, linewidth=0.8)
    df['swh'].rolling(window=30*4, center=True).mean().plot(ax=ax1, color='red', linewidth=2, label='30-day average')
    ax1.set_ylabel('Significant Wave Height (m)')
    ax1.set_title('Arugam Bay - Wave Height 2022')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Highlight surf season (May-October)
    surf_season = df[(df.index.month >= 5) & (df.index.month <= 10)]
    ax1.axvspan(surf_season.index.min(), surf_season.index.max(), alpha=0.2, color='green', label='Surf Season')
    
    # 2. Monthly wave height box plot
    ax2 = plt.subplot(3, 2, 2)
    df['month'] = df.index.month
    monthly_data = [df[df['month'] == m]['swh'].values for m in range(1, 13)]
    bp = ax2.boxplot(monthly_data, labels=['J','F','M','A','M','J','J','A','S','O','N','D'])
    ax2.set_ylabel('Wave Height (m)')
    ax2.set_title('Monthly Wave Height Distribution')
    ax2.grid(True, alpha=0.3)
    
    # Color surf season boxes
    for i, patch in enumerate(bp['boxes']):
        if i+1 >= 5 and i+1 <= 10:  # May-October
            patch.set_facecolor('lightgreen')
    
    # 3. Wind speed time series
    ax3 = plt.subplot(3, 2, 3)
    df['wind_speed'].plot(ax=ax3, color='orange', alpha=0.7, linewidth=0.8)
    df['wind_speed'].rolling(window=30*4, center=True).mean().plot(ax=ax3, color='darkred', linewidth=2)
    ax3.set_ylabel('Wind Speed (m/s)')
    ax3.set_title('Wind Speed 2022')
    ax3.grid(True, alpha=0.3)
    
    # 4. Wind direction scatter plot (by month)
    ax4 = plt.subplot(3, 2, 4)
    colors = plt.cm.viridis(df.index.month / 12)
    scatter = ax4.scatter(df.index.dayofyear, df['wind_direction'], c=df.index.month, 
                         cmap='viridis', alpha=0.6, s=8)
    ax4.set_ylabel('Wind Direction (°)')
    ax4.set_xlabel('Day of Year')
    ax4.set_title('Wind Direction Pattern 2022')
    ax4.set_ylim(0, 360)
    ax4.set_yticks([0, 90, 180, 270, 360])
    ax4.set_yticklabels(['N', 'E', 'S', 'W', 'N'])
    cbar = plt.colorbar(scatter, ax=ax4)
    cbar.set_label('Month')
    
    # 5. Wave height vs Wind speed
    ax5 = plt.subplot(3, 2, 5)
    ax5.scatter(df['wind_speed'], df['swh'], c=df.index.month, cmap='viridis', alpha=0.5, s=8)
    ax5.set_xlabel('Wind Speed (m/s)')
    ax5.set_ylabel('Wave Height (m)')
    ax5.set_title('Wave Height vs Wind Speed')
    ax5.grid(True, alpha=0.3)
    
    # 6. Seasonal wind rose (surf season focus)
    ax6 = plt.subplot(3, 2, 6, projection='polar')
    surf_season = df[(df.index.month >= 5) & (df.index.month <= 10)]
    
    # Convert to radians and create wind rose
    wind_dir_rad = np.radians(surf_season['wind_direction'])
    wind_speeds = surf_season['wind_speed']
    
    # Create bins for wind direction
    dir_bins = np.arange(0, 2*np.pi + np.pi/8, np.pi/8)
    speed_bins = [0, 3, 6, 9, 12, 20]
    
    for i in range(len(speed_bins)-1):
        mask = (wind_speeds >= speed_bins[i]) & (wind_speeds < speed_bins[i+1])
        if mask.sum() > 0:
            hist, _ = np.histogram(wind_dir_rad[mask], bins=dir_bins)
            width = 2*np.pi/len(hist)
            theta = dir_bins[:-1] + width/2
            ax6.bar(theta, hist, width=width, alpha=0.7, 
                   label=f'{speed_bins[i]}-{speed_bins[i+1]} m/s')
    
    ax6.set_theta_zero_location('N')
    ax6.set_theta_direction(-1)
    ax6.set_title('Surf Season Wind Rose\\n(May-October)', pad=20)
    ax6.legend(loc='upper left', bbox_to_anchor=(-0.1, 1.1))
    
    plt.tight_layout()
    plt.show()
    
    return fig

In [7]:
def print_surf_analysis(df, target_lat, target_lon):
    """Print key surf statistics for Arugam Bay"""
    print("ARUGAM BAY SURF ANALYSIS - 2022")
    print("="*60)
    
    # Overall statistics
    print(f"\nTarget Location: {target_lat:.2f}°N, {target_lon:.2f}°E")
    print(f"Data Points: {len(df):,} (6-hourly for full year)")
    
    # Wave statistics
    print("\nWAVE STATISTICS:")
    print(f"  Annual Mean SWH: {df['swh'].mean():.2f} m")
    print(f"  Annual Max SWH: {df['swh'].max():.2f} m")
    print(f"  Days with SWH > 2m: {(df['swh'] > 2.0).sum()/4:.0f} days")
    
    # Seasonal analysis
    surf_season = df[(df.index.month >= 5) & (df.index.month <= 10)]
    off_season = df[(df.index.month < 5) | (df.index.month > 10)]
    
    print("\nSEASONAL COMPARISON:")
    print(f"  Surf Season (May-Oct): {surf_season['swh'].mean():.2f}m avg, {surf_season['swh'].max():.2f}m max")
    print(f"  Off Season (Nov-Apr): {off_season['swh'].mean():.2f}m avg, {off_season['swh'].max():.2f}m max")
    
    # Best months
    monthly_avg = df.groupby(df.index.month)['swh'].mean()
    best_month = monthly_avg.idxmax()
    month_names = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    
    print("\nBEST SURF MONTHS:")
    print(f"  Peak Month: {month_names[best_month]} ({monthly_avg[best_month]:.2f}m avg)")
    print("  Top 3 Months: ", end="")
    for i, month_idx in enumerate(monthly_avg.nlargest(3).index):
        print(f"{month_names[month_idx]}({monthly_avg[month_idx]:.2f}m)", end="")
        if i < 2:
            print(", ", end="")
    print()  # final newline
    
    # Wind analysis
    print("\nWIND PATTERNS:")
    print(f"  Annual Mean Wind: {df['wind_speed'].mean():.1f} m/s")
    print(f"  Surf Season Mean Wind: {surf_season['wind_speed'].mean():.1f} m/s")
    
    # Dominant wind directions by season
    surf_wind_dir = np.degrees(np.arctan2(surf_season['v_wind'].mean(), surf_season['u_wind'].mean())) % 360
    off_wind_dir = np.degrees(np.arctan2(off_season['v_wind'].mean(), off_season['u_wind'].mean())) % 360
    
    print(f"  Surf Season Dominant Wind: {surf_wind_dir:.0f}° (SW Monsoon)")
    print(f"  Off Season Dominant Wind: {off_wind_dir:.0f}° (NE Monsoon)")

#### Main Execution

In [8]:
def main():
    """Run complete Arugam Bay surf analysis pipeline"""
    
    print("Arugam Bay Surf Analysis Pipeline - Starting...")
    print(f"Target: {TARGET_LAT:.2f}°N, {TARGET_LON:.2f}°E")
    
    # Step 1: Download data (uncomment to download)
    print("\n1. Downloading ERA5 data...")
    download_era5_data()
    
    # Step 2: Process data
    print("\n2. Processing data...")
    try:
        df = extract_and_clean_data()
    except FileNotFoundError:
        print("Data files not found. Please run download_era5_data() first.")
        return
    
    # Step 3: Analyze
    print("\n3. Running analysis...")
    monthly_stats = analyze_surf_seasons(df)
    
    # Step 4: Visualize
    print("\n4. Creating visualizations...")
    fig = create_visualizations(df)
    
    # Step 5: Print analysis
    print_surf_analysis(df)
    
    # Save processed data
    output_file = f"arugam_bay_surf_data_{YEAR}.csv"
    df.to_csv(output_file)
    print(f"\nProcessed data saved to: {output_file}")
    
    return df, fig


if __name__ == "__main__":
    # Run the complete analysis
    surf_data, analysis_plot = main()
    
    # Additional quick analysis
    print("\nQuick Monthly Averages:")
    monthly = surf_data.groupby(surf_data.index.month)[['swh', 'wind_speed']].mean()
    monthly.index = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    print(monthly.round(2))

Arugam Bay Surf Analysis Pipeline - Starting...
Target: 6.81°N, 81.83°E

1. Downloading ERA5 data...


2025-08-25 23:26:31,012 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.


Downloading atmospheric data...


2025-08-25 23:26:31,922 INFO Request ID is 9b2c1dde-7524-4c85-b446-3fc3193a2945
2025-08-25 23:26:32,158 INFO status has been updated to accepted
2025-08-25 23:26:37,434 INFO status has been updated to running
2025-08-25 23:32:53,812 INFO status has been updated to successful


1193a70e16e815edbefb6b680a71235c.nc:   0%|          | 0.00/245k [00:00<?, ?B/s]

Downloading wave data...


2025-08-25 23:32:56,569 INFO Request ID is cb4f4b95-6fa2-48f5-9389-2c6e79f6d2d7
2025-08-25 23:32:56,747 INFO status has been updated to accepted
2025-08-25 23:33:11,038 INFO status has been updated to running
2025-08-25 23:33:18,798 INFO status has been updated to accepted
2025-08-25 23:33:30,369 INFO status has been updated to running
2025-08-25 23:34:52,314 INFO status has been updated to successful


ffece230fd0b6248f17d1df7f4345f7.nc:   0%|          | 0.00/134k [00:00<?, ?B/s]


2. Processing data...
Loading and processing data...


AttributeError: 'Dataset' object has no attribute 'time'