### What this script does-

- Scans your mast/sonic and cloud radar (rain) folders by month, loads daily CSVs, and inner-joins them on TIMESTAMP.

- Builds QC flags:

    - Flag_Rain from rain rate,

    - Flag_Temp from mast–sonic temperature difference,

    - Flag_WS from mast–sonic horizontal wind speed difference.

- Summarizes QC pass/fail vs rain and overall usable fraction.

- Creates a rain/temperature-filtered dataset, computes radiometer surface temperature from IR20Up via Stefan–Boltzmann, and plots:

    - time series of u/v/w components,

    - stats for vertical wind (Average_Wind_Uz),

    - daily albedo,

    - instrument temperature comparisons & diurnal bias,

    - mast vs sonic wind speed (including calm/windy day comparisons),

    - direction comparisons (mast vs sonic) with circular metrics and wind-rose plots,

    - mast vs KNMI wind direction hourly circular mean + circular error metrics and a rose of Δθ.

#### Edit these lines before running
1) Root folders for mast/sonic data and cloud radar rain data
- BASE_MAST  = r"C:\path\to\Sonic"        
- BASE_RADAR = r"C:\path\to\Cloud_radar"  

2) Folder names under each root:
   MONTHS = ['2024-03', '2024-04', '2024-05']   # add/remove as needed

3) QC thresholds: Adjust if your instruments differ


4) Optional filtering for plots

5) KNMI reference file:
   file_path = r"C:\path\to\KNMI\uurgeg_235_2021-2030.txt"  # update to your KNMI file location


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

In [None]:
sigma=5.67e-8 #W*m^-2*K^-4


In [None]:

# 3) Helper to summarize QC vs rain
def summarize_flag(big, flag_col, rain_flag_col):
    N_rain      = (big[rain_flag_col]==1).sum()
    N_dry       = (big[rain_flag_col]==0).sum()
    N_rain_fail = ((big[rain_flag_col]==1) & (big[flag_col]==1)).sum()
    N_dry_fail  = ((big[rain_flag_col]==0) & (big[flag_col]==1)).sum()
    return {
        'N_rain':      N_rain,
        'rain_fail':   N_rain_fail,
        'rain_pass':   N_rain - N_rain_fail,
        'rain_fail_%': 100 * N_rain_fail / N_rain if N_rain else 0,
        'N_dry':       N_dry,
        'dry_fail':    N_dry_fail,
        'dry_pass':    N_dry - N_dry_fail,
        'dry_fail_%':  100 * N_dry_fail / N_dry if N_dry else 0,
    }


In [None]:
# Edit these lines before running!!1
#1) Root folders for mast/sonic data and cloud radar rain data
BASE_MAST  = r"C:\path\to\Sonic"        
BASE_RADAR = r"C:\path\to\Cloud_radar" 


# 2) Months to scan
MONTHS = ['2024-03', '2024-04', '2024-05']

all_days = []

for month in MONTHS:
    mast_month_dir  = os.path.join(BASE_MAST, month)
    radar_month_dir = os.path.join(BASE_RADAR, month)
    if not os.path.isdir(mast_month_dir) or not os.path.isdir(radar_month_dir):
        continue

    # look for date-folders
    for date_dir in os.listdir(mast_month_dir):
        mast_path  = os.path.join(mast_month_dir, date_dir, 'merged_data_10min.csv')
        radar_path = os.path.join(radar_month_dir, date_dir, 'Rain_10min_Averages.csv')
        if not (os.path.isfile(mast_path) and os.path.isfile(radar_path)):
            continue

        # load
        df_mast = pd.read_csv(mast_path,  parse_dates=['TIMESTAMP'])
        df_rain = pd.read_csv(radar_path, parse_dates=['TIMESTAMP'])

        
        # then do a simple inner join
        df = pd.merge(
            df_mast,
            df_rain[['TIMESTAMP','Rain']],  # only bring in the Rain column
            on='TIMESTAMP',
            how='inner'                     # keep only timestamps in both
        )


        # compute diffs & flags
        df['Temp_Diff']      = df['Temperature_K_2.99'] - df['Average_Temperature_Corr']#
        df['Flag_Temp']      = (df['Temp_Diff'].abs() > 1).astype(int)
        df['Flag_Rain']      = (df['Rain'] > 0).astype(int)
        df['wind_speed_sonic_hor']=np.sqrt(df['Average_Wind_Ux']**2+df['Average_Wind_Uy']**2)
        df['Windspeed_Diff'] = df['WS_ms_D15014_Avg']-df['wind_speed_sonic_hor']
        #df['Windspeed_Diff'] = df['WS_ms_D15014_Avg'] - df['Wind_Speed']
        df['Flag_WS']        = (df['Windspeed_Diff'].abs() > 1).astype(int)
        df['Flag_Rain_WS']   = df['Flag_Rain']  # same rain mask

        all_days.append(df)

# concatenate campaign
big = pd.concat(all_days, ignore_index=True)




In [None]:
print(big.columns)

In [None]:
# summarize
temp_stats = summarize_flag(big, 'Flag_Temp',    'Flag_Rain')
ws_stats   = summarize_flag(big, 'Flag_WS',      'Flag_Rain_WS')

print("=== Campaign‐Wide Sonic QC vs Rain (Temperature) ===")
print(f"Rainy intervals : {temp_stats['N_rain']}  |  Fail: {temp_stats['rain_fail']} ({temp_stats['rain_fail_%']:.1f}%)  |  Pass: {temp_stats['rain_pass']}")
print(f"Dry intervals   : {temp_stats['N_dry']}   |  Fail: {temp_stats['dry_fail']} ({temp_stats['dry_fail_%']:.1f}%)  |  Pass: {temp_stats['dry_pass']}")

print("\n=== Campaign‐Wide Sonic QC vs Rain (Wind Speed) ===")
print(f"Rainy intervals : {ws_stats['N_rain']}  |  Fail: {ws_stats['rain_fail']} ({ws_stats['rain_fail_%']:.1f}%)  |  Pass: {ws_stats['rain_pass']}")
print(f"Dry intervals   : {ws_stats['N_dry']}   |  Fail: {ws_stats['dry_fail']} ({ws_stats['dry_fail_%']:.1f}%)  |  Pass: {ws_stats['dry_pass']}")

# overall usable fraction after removing all rain-flagged intervals
usable = ((big['Flag_Rain']==0) & (big['Flag_Temp']==0)).sum()
total  = len(big)
print(f"\nOverall usable sonic‐temp intervals (dry & ΔT≤1 K): {usable}/{total} = {100*usable/total:.1f}%")

In [None]:
unique_days = big['TIMESTAMP'].dt.date.nunique()
print(f"Total unique days in merged data: {unique_days}")


In [None]:
# Filter out rain or temperature‐flagged intervals
filtered = big[(big['Flag_Rain'] == 0) & (big['Flag_Temp'] == 0)].copy()

# Compute radiometer‐derived temperature
filtered['T_radiometer'] = (filtered['IR20Up'] / sigma) ** 0.25
#filtered = filtered[filtered['SR15D1Dn_Irr'] > 10]

In [None]:
# Plot Average_Wind_Uz over time
plt.figure(figsize=(12, 6))
plt.plot(filtered["TIMESTAMP"], filtered["Average_Wind_Uz"], label='w', color='tab:red', alpha=0.7)
plt.plot(filtered["TIMESTAMP"], filtered["Average_Wind_Ux"], label='u', color='tab:green', alpha=0.7)
plt.plot(filtered["TIMESTAMP"], filtered["Average_Wind_Uy"], label='v', color='tab:blue', alpha=0.7)

plt.xlabel("Time")
plt.ylabel("Wind Speed (m/s)")
plt.title("Wind Speed Over Time")
plt.grid(True)
plt.tight_layout()
plt.legend()
plt.show()

In [None]:
# Select and clean
w = filtered[['TIMESTAMP', 'Average_Wind_Uz']].copy()
w = w.dropna(subset=['Average_Wind_Uz'])
w['abs_w'] = w['Average_Wind_Uz'].abs()

# Core statistics
stats = {
    'count': len(w),
    'mean_w': w['Average_Wind_Uz'].mean(),
    'std_w': w['Average_Wind_Uz'].std(ddof=1),
    'median_w': w['Average_Wind_Uz'].median(),
    'median_abs_w': w['abs_w'].median(),
    'rms_w': np.sqrt(np.mean(w['Average_Wind_Uz']**2)),
    'min_w': w['Average_Wind_Uz'].min(),
    'p10_w': w['Average_Wind_Uz'].quantile(0.10),
    'p25_w': w['Average_Wind_Uz'].quantile(0.25),
    'p75_w': w['Average_Wind_Uz'].quantile(0.75),
    'p90_w': w['Average_Wind_Uz'].quantile(0.90),
    'max_w': w['Average_Wind_Uz'].max(),
}

# Fractions within small thresholds (useful to show w ~ 0 near surface)
thresholds = [0.05, 0.10, 0.20, 0.50]
fractions = {f'frac_|w|<={thr}': (w['abs_w'] <= thr).mean()*100 for thr in thresholds}

# Print
print('--- Statistics for Average_Wind_Uz (10-min means) ---')
for k, v in stats.items():
    print(f'{k:>14}: {v: .3f} m/s' if 'count' not in k else f'{k:>14}: {v}')
print('--- Fraction within thresholds ---')
for k, v in fractions.items():
    print(f'{k:>14}: {v:5.1f} %')

# (Optional) quick check: median |w̄| to cite in text
print(f"\nMedian |w̄|: {w['abs_w'].median():.3f} m/s")

In [None]:
# Select a representative day with complete data (e.g., April 15, 2024)
day_data = filtered[(filtered['TIMESTAMP'] >= '2024-05-03') & (filtered['TIMESTAMP'] < '2024-05-04')].copy()

# Plot vertical wind speed (Uz) over the selected day
plt.figure(figsize=(10, 5))
plt.plot(day_data['TIMESTAMP'], day_data['Average_Wind_Uz'], label='Vertical Wind Speed (Uz)', color='darkred')
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.xlabel('Time (UTC)')
plt.ylabel('Uz (m/s)')
plt.title('Diurnal Variation of Vertical Wind Speed (Uz) on April 15, 2024')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
'''
# Create scatter plot again
plt.figure(figsize=(10, 5))
plt.scatter(filtered['TIMESTAMP'], 1/filtered['Albedo'], 
            color='tab:orange', s=10, alpha=0.7, edgecolor='k', linewidth=0.2)

# Formatting
plt.title('Surface Albedo (10-Minute)', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Albedo', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.xticks(rotation=45, fontsize=11)
plt.yticks(fontsize=11)
plt.tight_layout()
'''

In [None]:
'''
# --- Plotting ---
plt.figure(figsize=(10, 5))
plt.scatter(daily_albedo.index, 1/daily_albedo.values,
            color='tab:orange', edgecolor='k', s=35, alpha=0.8)

# Formatting
plt.title('Daily Average Surface Albedo ', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Albedo', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.xticks(rotation=45, fontsize=11)
plt.yticks(fontsize=11)
plt.tight_layout()

# Optional: save to file
# plt.savefig('daily_albedo_scatter.png', dpi=300)
plt.show()
'''

In [None]:
'''
# Convert TIMESTAMP to datetime if not already
filtered['TIMESTAMP'] = pd.to_datetime(filtered['TIMESTAMP'])

# Set TIMESTAMP as index for resampling
filtered.set_index('TIMESTAMP', inplace=True)

# Daily average of Albedo
daily_albedo = filtered['Albedo'].resample('D').mean().dropna()
'''

In [None]:
# 3) Scatter plot vs 1:1 line
T_mast  = filtered['Temperature_K_2.99']
T_sonic = filtered['Average_Temperature_Corr']
T_rad   = filtered['T_radiometer']

plt.figure(figsize=(8,6))
plt.scatter(T_mast, T_sonic, s=30, alpha=0.6, label='Sonic')
#plt.scatter(T_mast, T_rad,   s=30, alpha=0.6, label='Radiometer')
lims = [min(T_mast.min(), T_sonic.min(), T_rad.min()),
        max(T_mast.max(), T_sonic.max(), T_rad.max())]
plt.plot(lims, lims, 'k--', linewidth=1.5, label='1:1 line')
plt.xlabel('Mast Temperature (K)')
plt.ylabel('Instrument Temperature (K)')
plt.title('Scatter: Sonic & Radiometer vs Mast')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()


In [None]:

# 5) Derive metrics for each comparison
# Sonic vs Mast 2.99 m
T_mast_299  = filtered['Temperature_K_2.99']
T_sonic     = filtered['Average_Temperature_Corr']
delta_sonic = T_mast_299-T_sonic
mean_sonic  = delta_sonic.mean()
std_sonic   = delta_sonic.std()
r_sonic     = T_mast_299.corr(T_sonic)

# Radiometer vs Mast 2 m
T_mast_2    = filtered['Temperature_K_2']
T_rad       = filtered['T_radiometer']
delta_rad   = T_rad - T_mast_2
mean_rad    = delta_rad.mean()
std_rad     = delta_rad.std()
r_rad       = T_mast_2.corr(T_rad)

# 6) Plot styling
plt.rcParams.update({
    'font.size': 20,
    'axes.labelsize': 20,
    'axes.titlesize': 20,
    'legend.fontsize': 14,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18
})

# 7) Plot Sonic vs Mast 2.99 m
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(T_mast_299, T_sonic, s=30, alpha=0.6, color='#1f77b4')
lims = [min(T_mast_299.min(), T_sonic.min()), max(T_mast_299.max(), T_sonic.max())]
ax.plot(lims, lims, 'k--', linewidth=1.5, label='Equality line')
ax.set_xlabel('Mast Temperature at 2.99 m (K)')
ax.set_ylabel('Sonic Temperature (K)')
ax.set_title('Sonic vs. Mast (2.99 m) Temperature')
ax.text(0.05, 0.95,
        f'ΔT mean = {mean_sonic:.2f} K\nσ = {std_sonic:.2f} K\nr = {r_sonic:.2f}',
        transform=ax.transAxes, verticalalignment='top',
        bbox=dict(facecolor='white', alpha=0.8))
ax.legend(loc='lower right')
plt.tight_layout()
plt.show()

# 8) Plot Radiometer vs Mast 2 m
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(T_mast_2, T_rad, s=30, alpha=0.6, color='#ff7f0e')
lims = [min(T_mast_2.min(), T_rad.min()), max(T_mast_2.max(), T_rad.max())]
ax.plot(lims, lims, 'k--', linewidth=1.5, label='Equality line')
ax.set_xlabel('Mast Temperature at 2 m (K)')
ax.set_ylabel('Radiometer-derived Temperature (K)')
ax.set_title('Radiometer vs. Mast (2 m) Temperature')
ax.text(0.05, 0.95,
        f'ΔT mean = {mean_rad:.2f} K\nσ = {std_rad:.2f} K\nr = {r_rad:.2f}',
        transform=ax.transAxes, verticalalignment='top',
        bbox=dict(facecolor='white', alpha=0.8))
ax.legend(loc='lower right')
plt.tight_layout()
plt.show()

In [None]:
print(filtered)

In [None]:
# 3) Extract hour and compute deltas
filtered['Hour'] = filtered['TIMESTAMP'].dt.hour
filtered['DeltaSonic'] = filtered['Temperature_K_2.99']-filtered['Average_Temperature_Corr'] 
# filtered['DeltaRad'] = filtered['T_radiometer'] - filtered['Temperature_K_2']

# 4) Group by hour: mean & std
hourly = filtered.groupby('Hour').agg({
    'DeltaSonic': ['mean', 'std'],
    # 'DeltaRad': ['mean', 'std']
})
hourly.columns = ['S_mean', 'S_std']  # Update if you include DeltaRad: + ['R_mean', 'R_std']
hourly = hourly.reset_index()

# 5) Plot diurnal cycle
plt.figure(figsize=(8, 6))
plt.errorbar(hourly['Hour'], hourly['S_mean'], yerr=hourly['S_std'],
             marker='o', linestyle='-', label='Mast (2.99m) – Sonic', capsize=3)
# plt.errorbar(hourly['Hour'], hourly['R_mean'], yerr=hourly['R_std'],
#              marker='s', linestyle='--', label='Radiometer – Mast (2m)', capsize=3)

plt.xticks(range(0, 24, 2))
plt.xlabel('Hour of Day')
plt.ylabel('Temperature Bias (K)')
plt.title('Diurnal Variation of Temperature Bias')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:

# 1) Filter out rain‐flagged and wind‐flagged intervals
wind_data = big[(big.Flag_Rain == 0)].copy()# & (big.Flag_WS == 0)


# 4) Compute metrics
ws_mast     = wind_data['WS_ms_D15008_Avg']#WS_ms_D15014_Avg
ws_sonic    = wind_data['wind_speed_sonic_hor']#wind_data['Wind_Speed']
delta_ws    = ws_mast-ws_sonic
mean_ws     = delta_ws.mean()
std_ws      = delta_ws.std()
r_ws        = ws_mast.corr(ws_sonic)
pct_within1 = (delta_ws.abs() <= 1).mean() * 100

# 5) Styled scatter plot
plt.rcParams.update({
    'font.size': 20,
    'axes.labelsize': 18,
    'axes.titlesize': 18,
    'legend.fontsize': 12,
})
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(ws_mast, ws_sonic, s=1, alpha=0.6, color='#1f77b4')#, label='10-min averages')
lims = [min(ws_mast.min(), ws_sonic.min()), max(ws_mast.max(), ws_sonic.max())]
ax.plot(lims, lims, 'k--', linewidth=1.5, label='1:1 equality')

ax.text(0.02, 0.98,
        f'Δw_s mean = {mean_ws:.2f} m/s\nσ = {std_ws:.2f} m/s\nr = {r_ws:.2f}\n±1 m/s = {pct_within1:.1f}%',
        transform=ax.transAxes, ha='left', va='top',
        bbox=dict(facecolor='white', alpha=0.8))

ax.set_xlabel('Mast Wind Speed (m/s)')
ax.set_ylabel('Sonic Wind Speed (m/s)')
ax.set_title('Mast (4.47 m) vs. Sonic Wind Speed')
ax.legend(loc='lower right')
plt.tight_layout()
plt.show()

# 4) Identify calmest & windiest dates
wind_data['Date'] = wind_data['TIMESTAMP'].dt.date
daily = wind_data.groupby('Date')['WS_ms_D15008_Avg'].mean().reset_index()
calmest  = daily.nsmallest(1, 'WS_ms_D15008_Avg')['Date'].iloc[0]
windiest = daily.nlargest(1,  'WS_ms_D15008_Avg')['Date'].iloc[0]

# Extract time‐of‐day in hours (float)
for label, date in [('Calm', calmest), ('Windy', windiest)]:
    df = wind_data[wind_data['Date'] == date].copy()
    # time since midnight in hours
    df['HourOfDay'] = df['TIMESTAMP'].dt.hour + df['TIMESTAMP'].dt.minute/60.0
    if label == 'Calm':
        df_calm = df
    else:
        df_wind = df

# Now plot both on the same HourOfDay axis:
plt.figure(figsize=(8,6))

# Calm day (circles)
plt.scatter(df_calm['HourOfDay'], df_calm['WS_ms_D15008_Avg'],
            s=50, marker='*', color='#1f77b4', alpha=0.6,
            label=f'Calm Mast ({calmest:%m-%d})')
plt.scatter(df_calm['HourOfDay'], df_calm['wind_speed_sonic_hor'],
            s=50, marker='*', color='#ff7f0e', alpha=0.6,
            label=f'Calm Sonic ({calmest:%m-%d})')
plt.plot(df_calm['HourOfDay'], df_calm['Windspeed_Diff'],
         color='#2a9d8f',linestyle='--', label='Calm Δw_s')

# Windy day (diamonds)
plt.scatter(df_wind['HourOfDay'], df_wind['WS_ms_D15008_Avg'],
            s=50, marker='+', color='#1f77b4', alpha=0.8,
            label=f'Windy Mast ({windiest:%m-%d})')
plt.scatter(df_wind['HourOfDay'], df_wind['wind_speed_sonic_hor'],
            s=50, marker='+', color='#ff7f0e', alpha=0.8,
            label=f'Windy Sonic ({windiest:%m-%d})')
plt.plot(df_wind['HourOfDay'], df_wind['Windspeed_Diff'],
         color='#2a9d8f',linestyle='-', label='Windy Δw_s')

# Formatting
plt.xticks(np.arange(0,24+1,2))
plt.xlim(-0.5, 23.5)
plt.ylim(-1,10)
plt.xlabel('Hour of Day')
plt.ylabel('Wind Speed (m/s)')
plt.title('Calm vs. Windy: Mast & Sonic Wind Speed vs Time')
plt.legend(ncol=1, loc='upper right')# fontsize='small')
plt.tight_layout()
plt.show()

In [None]:
# 1) Filter out rain-flagged intervals
wind_data = big[(big.Flag_Rain == 0)].copy()


# Compute wind speed differences
df["Hour"] = df["TIMESTAMP"].dt.hour
df["Delta_WS_4.47"] = df["WS_ms_D15014_Avg"] - df["Wind_Speed"]
df["Delta_WS_10"] = df["WS_ms_D15463_Avg"] - df["Wind_Speed"]
df["Delta_WS_2"] = df["WS_ms_D15008_Avg"] - df["Wind_Speed"]

# Group by hour and calculate mean and std
grouped = df.groupby("Hour").agg({
    "Delta_WS_4.47": ["mean", "std"],
    "Delta_WS_10": ["mean", "std"],
    "Delta_WS_2": ["mean", "std"]
})
grouped.columns = ["Delta_4.47_mean", "Delta_4.47_std", "Delta_10_mean", "Delta_10_std", "Delta_2_mean", "Delta_2_std"]
grouped = grouped.reset_index()

# Plotting
plt.figure(figsize=(8, 6))
plt.errorbar(grouped["Hour"], grouped["Delta_2_mean"], yerr=grouped["Delta_2_std"],
             label="Mast 2.0 m – Sonic", fmt='-o', capsize=3)
plt.errorbar(grouped["Hour"], grouped["Delta_4.47_mean"], yerr=grouped["Delta_4.47_std"],
             label="Mast 4.47 m – Sonic", fmt='-o', capsize=3)
plt.errorbar(grouped["Hour"], grouped["Delta_10_mean"], yerr=grouped["Delta_10_std"],
             label="Mast 10.0 m – Sonic", fmt='-o', capsize=3)

plt.axhline(0, linestyle='--', color='gray', linewidth=1)
plt.xlabel("Hour of Day")
plt.ylabel("Wind Speed Difference (m/s)")
plt.title("Diurnal Variation of Wind Speed Difference (Mast – Sonic)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:

# 1. Compute Sonic Wind Direction (meteorological convention)
ux = wind_data['Average_Wind_Ux']
uy = wind_data['Average_Wind_Uy']
wind_data['sonic_dir'] = (270 - np.degrees(np.arctan2(uy, ux))) % 360

# 2. Copy Mast Wind Direction
wind_data['mast_dir'] = wind_data['WindDir_D15014_Avg']

# 3. Compute signed angular difference Δθ = sonic – mast in [-180,180]
delta = (wind_data['sonic_dir'] - wind_data['mast_dir'] + 180) % 360 - 180
wind_data['delta_dir'] = delta

# 4. Circular metrics
cos_mean = np.mean(np.cos(np.radians(delta)))
sin_mean = np.mean(np.sin(np.radians(delta)))
R = np.hypot(cos_mean, sin_mean)
circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360
circ_std  = np.degrees(np.sqrt(-2 * np.log(R)))
pct_within10 = (np.abs(delta) <= 10).mean() * 100

print(f"Circular bias (mean Δθ):   {circ_mean:.1f}°")
print(f"Circular std dev:         {circ_std:.1f}°")
print(f"Resultant vector length R: {R:.3f}")
print(f"% within ±10°:            {pct_within10:.1f}%")

# =========== Plot 1: Hexbin Mast vs Sonic Direction ===========
plt.figure(figsize=(6,6))
hb = plt.hexbin(wind_data['mast_dir'], wind_data['sonic_dir'],
                gridsize=36, cmap='viridis', extent=[0,360,0,360])
plt.plot([0,360],[0,360],'k--',linewidth=1)
plt.xlim(0,360); plt.ylim(0,360)
plt.xlabel('Mast Direction (°)')
plt.ylabel('Sonic Direction (°)')
plt.title('Mast vs Sonic Wind Direction')
plt.colorbar(hb, label='Counts')
plt.tight_layout()
plt.show()

# =========== Plot 2: Wind Roses ===========

def plot_rose(ax, directions, bins=16, title=''):
    # create equal-width bins
    edges = np.linspace(0, 360, bins+1)
    counts, _ = np.histogram(directions, bins=edges)
    angles = np.radians(edges[:-1] + (360/bins)/2)
    ax.bar(angles, counts, width=np.radians(360/bins),
           bottom=0, edgecolor='k', align='edge')
    ax.set_theta_zero_location('N')
    ax.set_theta_direction(-1)
    ax.set_title(title)

fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(14,4),
                                    subplot_kw={'projection':'polar'})

plot_rose(ax1, wind_data['mast_dir'],  bins=16, title='Mast Wind Rose')
plot_rose(ax2, wind_data['sonic_dir'], bins=16, title='Sonic Wind Rose')
# Δθ rose: center at zero
# shift negative deltas to [0,360) for a circular histogram
delta_pos = (wind_data['delta_dir'] + 360) % 360
plot_rose(ax3, delta_pos, bins=16, title='Δθ = Sonic–Mast')

plt.tight_layout()
plt.show()


In [None]:
# 1) Sonic mean direction from u,v (meteorological "from" convention) — your approach is fine
ux = wind_data['Average_Wind_Ux']  # +east
uy = wind_data['Average_Wind_Uy']  # +north
wind_data['sonic_dir'] = (270 - np.degrees(np.arctan2(uy, ux))) % 360

# (Optional sanity check) Mean vertical velocity should be ~0 for 10-min means
if 'Average_Wind_Uz' in wind_data.columns:
    w_med = np.nanmedian(np.abs(wind_data['Average_Wind_Uz']))
    print(f"Median |w̄| over 10-min means: {w_med:.3f} m/s")

# 2) Mast direction (same timestamps)
wind_data['mast_dir'] = wind_data['WindDir_D15014_Avg']

# 3) Circular difference Δθ = sonic – mast (wrapped to [-180, 180))
delta = (wind_data['sonic_dir'] - wind_data['mast_dir'] + 180) % 360 - 180
wind_data['delta_dir'] = delta

# 4) Circular metrics (rotation angle, spread)
cos_mean = np.mean(np.cos(np.radians(delta)))
sin_mean = np.mean(np.sin(np.radians(delta)))
R = np.hypot(cos_mean, sin_mean)
theta_rot = (np.degrees(np.arctan2(sin_mean, cos_mean)))  # in (-180, 180]
if theta_rot <= -180: theta_rot += 360
if theta_rot > 180: theta_rot -= 360
circ_std  = np.degrees(np.sqrt(-2 * np.log(max(R, 1e-12))))
pct_within10 = (np.abs(delta) <= 10).mean() * 100

print(f"Rotation (circular mean Δθ): {theta_rot:.1f}°")
print(f"Circular std dev:            {circ_std:.1f}°")
print(f"Resultant vector length R:   {R:.3f}")
print(f"% within ±10°:               {pct_within10:.1f}%")

# (Optional) circular mode of Δθ to explain 116° vs ~135° question
bins = np.linspace(-180, 180, 37)  # 10° bins
hist, edges = np.histogram(delta, bins=bins)
mode_bin_idx = np.argmax(hist)
mode_center = 0.5 * (edges[mode_bin_idx] + edges[mode_bin_idx+1])
print(f"Circular mode (histogram center): ~{mode_center:.1f}°")

# 5) Apply rotation to sonic directions and compute residuals
wind_data['sonic_dir_corr'] = (wind_data['sonic_dir'] - theta_rot) % 360
residual = (wind_data['sonic_dir_corr'] - wind_data['mast_dir'] + 180) % 360 - 180
wind_data['residual_dir'] = residual
print(f"Post-rotation median |residual|: {np.nanmedian(np.abs(residual)):.1f}°")

# ========= Plot A: Hexbin BEFORE correction (wrap-aware) =========
plt.figure(figsize=(6,6))
# Duplicate points shifted by ±360 to reduce edge artifacts near 0/360
x_mast = np.concatenate([wind_data['mast_dir'].to_numpy(),
                         wind_data['mast_dir'].to_numpy()+360,
                         wind_data['mast_dir'].to_numpy()-360])
y_sonic = np.concatenate([wind_data['sonic_dir'].to_numpy(),
                          wind_data['sonic_dir'].to_numpy()+360,
                          wind_data['sonic_dir'].to_numpy()-360])

hb = plt.hexbin(x_mast, y_sonic, gridsize=36, cmap='viridis',
                extent=[-60, 420, -60, 420])
plt.plot([-60, 420], [-60, 420], 'k--', linewidth=1)
plt.xlim(0, 360); plt.ylim(0, 360)
plt.xlabel('Mast Direction (°)')
plt.ylabel('Sonic Direction (°)')
plt.title('Mast vs Sonic Direction (before rotation)')
plt.colorbar(hb, label='Counts')
plt.tight_layout()
plt.show()

# ========= Plot B: Hexbin AFTER correction (wrap-aware) =========
plt.figure(figsize=(6,6))
x_mast = np.concatenate([wind_data['mast_dir'].to_numpy(),
                         wind_data['mast_dir'].to_numpy()+360,
                         wind_data['mast_dir'].to_numpy()-360])
y_sonic_corr = np.concatenate([wind_data['sonic_dir_corr'].to_numpy(),
                               wind_data['sonic_dir_corr'].to_numpy()+360,
                               wind_data['sonic_dir_corr'].to_numpy()-360])

hb = plt.hexbin(x_mast, y_sonic_corr, gridsize=36, cmap='viridis',
                extent=[-60, 420, -60, 420])
plt.plot([-60, 420], [-60, 420], 'k--', linewidth=1)
plt.xlim(0, 360); plt.ylim(0, 360)
plt.xlabel('Mast Direction (°)')
plt.ylabel('Sonic Direction (°, rotated)')
plt.title(f'Mast vs Sonic (after rotation {theta_rot:.1f}°)')
plt.colorbar(hb, label='Counts')
plt.tight_layout()
plt.show()


In [None]:
# ======== Apply Correction to Sonic Direction =========
# Shift the sonic direction to align with mast using circular mean
wind_data['sonic_corrected'] = (wind_data['sonic_dir'] - circ_mean) % 360

# ======== Plot: Corrected Hexbin Mast vs Sonic =========
plt.figure(figsize=(6,6))
hb_corr = plt.hexbin(wind_data['mast_dir'], wind_data['sonic_corrected'],
                     gridsize=36, cmap='viridis', extent=[0,360,0,360])
plt.plot([0,360],[0,360],'k--',linewidth=1)
plt.xlim(0,360); plt.ylim(0,360)
plt.xlabel('Mast Direction (°)')
plt.ylabel('Corrected Sonic Direction (°)')
plt.title('Mast vs Corrected Sonic Wind Direction')
plt.colorbar(hb_corr, label='Counts')
plt.tight_layout()
plt.show()


In [None]:
delta_corrected = (wind_data['sonic_corrected'] - wind_data['mast_dir'] + 180) % 360 - 180
wind_data['delta_dir_corrected'] = delta_corrected

# Shift to [0, 360) for circular rose plotting
delta_corr_pos = (delta_corrected + 360) % 360

# ======== Plot: Corrected Difference Wind Rose =========
fig = plt.figure(figsize=(4.5, 4))
ax = fig.add_subplot(111, projection='polar')

plot_rose(ax, delta_corr_pos, bins=16, title='Corrected Δθ = Sonic – Mast')
plt.tight_layout()
plt.show()

In [None]:
#Edit before running!!

# ======== Load KNMI Wind Direction Data =========
#KNMI reference file:
file_path = r"C:\path\to\KNMI\uurgeg_235_2021-2030.txt"  # update to your KNMI file location

header_row = 31

df_knmi = pd.read_csv(file_path, skiprows=header_row, sep=",", engine='python', on_bad_lines='skip')
df_knmi.columns = df_knmi.columns.str.strip()

# Convert to datetime and filter
df_knmi['Timestamp'] = pd.to_datetime(
    df_knmi['YYYYMMDD'].astype(str) + df_knmi['HH'].astype(str).str.zfill(2),
    format='%Y%m%d%H',
    errors='coerce'
)
df_knmi['Timestamp'] = df_knmi['Timestamp'].dt.tz_localize('UTC')

df_knmi = df_knmi[(df_knmi['Timestamp'] >= '2024-02-28') & (df_knmi['Timestamp'] <= '2024-06-13')]
df_knmi = df_knmi.rename(columns={'DD': 'knmi_dir'})
wind_data['TIMESTAMP'] = pd.to_datetime(wind_data['TIMESTAMP'])
wind_data['TIMESTAMP'] = wind_data['TIMESTAMP'].dt.tz_localize('Europe/Amsterdam').dt.tz_convert('UTC')


In [None]:
print(df_knmi)

In [None]:
# ======== Compute Hourly Circular Mean of Mast Direction @ 2 m (convert local -> UTC) =========

# Filter out flagged data
wind_data = big[(big.Flag_Rain == 0) & (big.Flag_WS == 0)].copy()

# Select mast wind direction and convert timestamp
mast_dir_deg = wind_data[['TIMESTAMP', 'WindDir_D15463_Avg']].copy()

# 1) Parse timestamps as naive, then localize to Europe/Amsterdam and convert to UTC
mast_dir_deg['TIMESTAMP'] = pd.to_datetime(mast_dir_deg['TIMESTAMP'])
mast_dir_deg['TIMESTAMP'] = mast_dir_deg['TIMESTAMP'].dt.tz_localize('Europe/Amsterdam').dt.tz_convert('UTC')

# Use UTC index from here on
mast_dir_deg = mast_dir_deg.set_index('TIMESTAMP')

print("\nRaw mast wind direction (sample, UTC):")
print(mast_dir_deg.head())

# Convert to radians
mast_rad = np.radians(mast_dir_deg['WindDir_D15463_Avg'])

# Compute vector components and resample hourly (in UTC)
df_vector = pd.DataFrame({
    'u': np.cos(mast_rad),
    'v': np.sin(mast_rad)
}, index=mast_dir_deg.index)

print("\nVector components before resample (u, v):")
print(df_vector.head())

# Resample to hourly mean (still UTC)
df_hourly = df_vector.resample('H').mean()

print("\nHourly-averaged vector components (UTC):")
print(df_hourly[['u', 'v']].head())

# Convert back to direction (degrees, 0–360)
df_hourly['mast_dir'] = (np.degrees(np.arctan2(df_hourly['v'], df_hourly['u'])) + 360) % 360

# Final cleanup: keep UTC timestamp as a column named 'Timestamp'
df_hourly = df_hourly[['mast_dir']].dropna().reset_index().rename(columns={'TIMESTAMP': 'Timestamp'})

print("\nHourly mast wind direction (circular mean, UTC):")
print(df_hourly[['Timestamp', 'mast_dir']].head())

# ======== Merge with KNMI Data on Overlapping Timestamps (both UTC) =========

print("\nKNMI data sample (UTC):")
print(df_knmi[['Timestamp', 'knmi_dir']].head())

df_merged = pd.merge(df_hourly, df_knmi[['Timestamp', 'knmi_dir']],
                     on='Timestamp', how='inner').dropna()

print("\nMerged mast + KNMI wind directions (UTC):")
print(df_merged.head())

# ======== Circular Difference and Metrics (mast - KNMI) =========

delta_knmi = (df_merged['mast_dir'] - df_merged['knmi_dir'] + 180) % 360 - 180
df_merged['delta_dir'] = delta_knmi

print("\nΔθ (mast - KNMI) sample:")
print(df_merged[['Timestamp', 'mast_dir', 'knmi_dir', 'delta_dir']].head())

cos_mean = np.mean(np.cos(np.radians(delta_knmi)))
sin_mean = np.mean(np.sin(np.radians(delta_knmi)))
R = np.hypot(cos_mean, sin_mean)
circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360
circ_std  = np.degrees(np.sqrt(-2 * np.log(max(R, 1e-12))))
pct_within10 = (np.abs(delta_knmi) <= 10).mean() * 100

print("\n--- Mast vs KNMI Direction Comparison (UTC) ---")
print(f"Circular bias (mean Δθ):   {circ_mean:.1f}°")
print(f"Circular std dev:          {circ_std:.1f}°")
print(f"Resultant vector length R: {R:.3f}")
print(f"% within ±10°:             {pct_within10:.1f}%")


In [None]:
'''
# ======== Compute Hourly Circular Mean of Mast Direction @ 2 m =========

# Filter out flagged data
wind_data = big[(big.Flag_Rain == 0) & (big.Flag_WS == 0)].copy()

# Select 10 m wind direction and convert timestamp
mast_dir_deg = wind_data[['TIMESTAMP', 'WindDir_D15463_Avg']].copy()
mast_dir_deg['TIMESTAMP'] = pd.to_datetime(mast_dir_deg['TIMESTAMP'])
mast_dir_deg.set_index('TIMESTAMP', inplace=True)
# Print a preview of the original mast direction data
print("\nRaw mast wind direction (sample):")
print(mast_dir_deg.head())
# Convert to radians
mast_rad = np.radians(mast_dir_deg['WindDir_D15463_Avg'])

# Compute vector components and resample hourly
df_vector = pd.DataFrame({
    'u': np.cos(mast_rad),
    'v': np.sin(mast_rad)
}, index=mast_dir_deg.index)

# Print the vector components before resampling
print("\nVector components before resample (u, v):")
print(df_vector.head())

df_hourly = df_vector.resample('H').mean()
# After resampling hourly
print("\nHourly-averaged vector components:")
print(df_hourly[['u', 'v']].head())

# Convert back to degrees
df_hourly['mast_dir'] = (np.degrees(np.arctan2(df_hourly['v'], df_hourly['u'])) + 360) % 360


# Final cleanup
df_hourly = df_hourly[['mast_dir']].dropna().reset_index()
df_hourly = df_hourly.rename(columns={df_hourly.columns[0]: 'Timestamp'})

# After converting back to degrees
print("\nHourly mast wind direction (circular mean):")
print(df_hourly[['Timestamp', 'mast_dir']].head())

# ======== Merge with KNMI Data on Overlapping Timestamps =========

print("\nKNMI data sample:")
print(df_knmi[['Timestamp', 'knmi_dir']].head())

df_merged = pd.merge(df_hourly, df_knmi[['Timestamp', 'knmi_dir']], on='Timestamp', how='inner').dropna()
print("\nMerged mast + KNMI wind directions:")
print(df_merged.head())
# ======== Circular Difference and Metrics =========

delta_knmi = (df_merged['mast_dir'] - df_merged['knmi_dir'] + 180) % 360 - 180

df_merged['delta_dir'] = delta_knmi
# Print raw directional differences
print("\nΔθ (mast - KNMI) sample:")
print(df_merged[['Timestamp', 'mast_dir', 'knmi_dir', 'delta_dir']].head())
cos_mean = np.mean(np.cos(np.radians(delta_knmi)))
sin_mean = np.mean(np.sin(np.radians(delta_knmi)))
R = np.hypot(cos_mean, sin_mean)
circ_mean = (np.degrees(np.arctan2(sin_mean, cos_mean))) % 360
circ_std = np.degrees(np.sqrt(-2 * np.log(R)))
pct_within10 = (np.abs(delta_knmi) <= 10).mean() * 100

print("\n--- Mast vs KNMI Direction Comparison ---")
print(f"Circular bias (mean Δθ):   {circ_mean:.1f}°")
print(f"Circular std dev:          {circ_std:.1f}°")
print(f"Resultant vector length R: {R:.3f}")
print(f"% within ±10°:             {pct_within10:.1f}%")
'''

In [None]:
# ======== Plot Wind Rose of Δθ (Mast – KNMI) =========

def plot_rose(ax, directions, bins=16, title=''):
    edges = np.linspace(0, 360, bins+1)
    counts, _ = np.histogram(directions, bins=edges)
    angles = np.radians(edges[:-1] + (360/bins)/2)
    ax.bar(angles, counts, width=np.radians(360/bins),
           bottom=0, edgecolor='k', align='edge')
    ax.set_theta_zero_location('N')
    ax.set_theta_direction(-1)
    ax.set_title(title)

# Shift deltas to [0, 360) for circular rose
delta_pos = (df_merged['delta_dir'] + 360) % 360

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111, projection='polar')
plot_rose(ax, delta_pos, bins=16, title='Δθ = Mast – KNMI')
plt.tight_layout()
plt.show()