In [1]:
# # Clean the requirements file by removing file-based dependencies 
# !grep -v "@ file:" requirements.txt > clean_requirements.txt

# # Install packages from the cleaned requirements file
# !pip install -r clean_requirements.txt

# install

In [2]:
!pip install --upgrade "numpy<1.25.0" scipy numba

Defaulting to user installation because normal site-packages is not writeable
Requirement already up-to-date: numpy<1.25.0 in /home/jovyan/.local/lib/python3.8/site-packages (1.24.4)
Requirement already up-to-date: scipy in /home/jovyan/.local/lib/python3.8/site-packages (1.10.1)
Requirement already up-to-date: numba in /home/jovyan/.local/lib/python3.8/site-packages (0.58.1)


In [3]:
# Fix importlib_metadata compatibility issue
!pip install importlib_metadata==4.13.0

# Install xarray dependencies
!pip install netcdf4 cftime

Defaulting to user installation because normal site-packages is not writeable
Collecting importlib_metadata==4.13.0
  Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Installing collected packages: importlib-metadata
Successfully installed importlib-metadata-4.13.0
Defaulting to user installation because normal site-packages is not writeable


In [4]:
# Reinstall xarray with compatible dependencies
!pip install xarray==2023.1.0 --force-reinstall

Defaulting to user installation because normal site-packages is not writeable
Collecting xarray==2023.1.0
  Using cached xarray-2023.1.0-py3-none-any.whl (973 kB)
Collecting packaging>=21.3
  Using cached packaging-24.2-py3-none-any.whl (65 kB)
Collecting numpy>=1.20
  Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting pandas>=1.3
  Using cached pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
Collecting tzdata>=2022.1
  Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Collecting python-dateutil>=2.8.2
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl (229 kB)
[K     |████████████████████████████████| 229 kB 8.2 MB/s eta 0:00:01
[?25hCollecting pytz>=2020.1
  Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Collecting six>=1.5
  Downloading six-1.17.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: packaging, numpy, tzdata, six, python-dateutil, pytz, pandas, xar

In [5]:
# !pip install netCDF4 xarray matplotlib

In [6]:
!pip install --upgrade numexpr bottleneck

Defaulting to user installation because normal site-packages is not writeable
Requirement already up-to-date: numexpr in /home/jovyan/.local/lib/python3.8/site-packages (2.8.6)
Requirement already up-to-date: bottleneck in /home/jovyan/.local/lib/python3.8/site-packages (1.4.0)


In [7]:
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
from scipy import stats
import matplotlib.gridspec as gridspec
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.legend_handler import HandlerTuple
import matplotlib.path as mpath
import matplotlib.patches as mpatches

In [8]:

def process_latent_heat_data(obs_file: str) -> pd.DataFrame:
    """Process observation data for latent heat."""
    ds = xr.open_dataset(obs_file)
    if isinstance(ds.time.values[0], np.floating):
        times = pd.to_datetime([f"{int(t):010d}" for t in ds.time.values], 
                             format='%Y%m%d%H')
        ds['time'] = times
    return ds['Qle_cor'].to_dataframe()

def process_sensible_heat_data(flux_file: str) -> pd.DataFrame:
    """Process observation data for sensible heat."""
    ds_flux = xr.open_dataset(flux_file)
    if isinstance(ds_flux.time.values[0], np.floating):
        times = pd.to_datetime([f"{int(t):010d}" for t in ds_flux.time.values], 
                             format='%Y%m%d%H')
        ds_flux['time'] = times
    return pd.DataFrame({'Qh': ds_flux['Qh_cor'].values}, index=ds_flux.time.values)

def process_gpp_data(flux_file: str) -> pd.DataFrame:
    """Process observation data for GPP."""
    ds_flux = xr.open_dataset(flux_file)
    if isinstance(ds_flux.time.values[0], np.floating):
        times = pd.to_datetime([f"{int(t):010d}" for t in ds_flux.time.values], 
                             format='%Y%m%d%H')
        ds_flux['time'] = times
    return pd.DataFrame({'GPP': ds_flux['GPP_DT'].values}, index=ds_flux.time.values)

def process_model_data(mod_file: str) -> tuple:
    """Process model data for all variables."""
    ds = xr.open_dataset(mod_file)
    times = [datetime.strptime(t.decode('utf-8'), '%Y-%m-%d_%H:%M:%S') 
             for t in ds['Times'].values]
    return (
        pd.DataFrame({'LH': ds['LH'][:, 0, 0].values}, index=times),
        pd.DataFrame({'HFX': ds['HFX'][:, 0, 0].values}, index=times),
        pd.DataFrame({'PSN': ds['PSN'][:, 0, 0].values}, index=times)
    )

def split_data_by_index(data: pd.DataFrame, split_index: int) -> tuple:
    """Split data into two periods based on index."""
    data1 = data.iloc[:split_index]
    data2 = data.iloc[split_index:]
    return data1, data2

def analyze_data_statistics(data: pd.DataFrame, column: str, name: str) -> None:
    """
    Analyze and print statistics for the original data.
    
    Args:
        data (pd.DataFrame): Input dataframe
        column (str): Column name to analyze
        name (str): Name of the dataset for printing
    """
    print(f"\n{name} Statistics:")
    print(f"Max value: {data[column].max():.2f}")
    print(f"Min value: {data[column].min():.2f}")
    print(f"Mean value: {data[column].mean():.2f}")
    print(f"Time of max value: {data[column].idxmax()}")
    print(f"Time of min value: {data[column].idxmin()}")


In [9]:

def create_seasonal_data(df: pd.DataFrame, column: str) -> tuple:
    """
    Create seasonal data by aggregating values for each day of year.
    Returns both the seasonal data and its statistics.
    
    Args:
        df (pd.DataFrame): Input dataframe with datetime index
        column (str): Column name to process
    
    Returns:
        tuple: (seasonal_data, daily_stats)
    """
    df_seasonal = df.copy()
    df_seasonal['year'] = df_seasonal.index.year
    df_seasonal['doy'] = df_seasonal.index.dayofyear
    
    # Group by day of year and calculate statistics
    daily_stats = df_seasonal.groupby('doy')[column].agg(['mean', 'min', 'max'])
    
    # Calculate individual year values for plotting
    seasonal_years = df_seasonal.groupby(['year', 'doy'])[column].mean().unstack(0)
    
    return seasonal_years, daily_stats

def create_diurnal_data(df: pd.DataFrame, column: str) -> tuple:
    """
    Create diurnal data by aggregating values for each hour of day in local time.
    Returns both the diurnal data (individual years) and hourly statistics.
    
    Args:
        df (pd.DataFrame): Input DataFrame with datetime index in UTC.
        column (str): Column name to process.
    
    Returns:
        tuple: (diurnal_data, hourly_stats)
    """
#     ！！！注意！如果需要其他时区的站点，需要修改！！！
#     这里假设记录的时间是 UTC，而我们希望将其转换为 Site 的 local time。
#   local longitude = 90W
#   local_offset = -90 / 15 = -6 hours.

    local_offset = -6  # Offset in hours from UTC to local time
    df_diurnal = df.copy()
    df_diurnal['year'] = df_diurnal.index.year
    # Adjust the hour to local time by adding the offset and wrapping with modulo 24
    df_diurnal['hour'] = (df_diurnal.index.hour + local_offset) % 24

    # Group by local hour and calculate statistics
    hourly_stats = df_diurnal.groupby('hour')[column].agg(['mean', 'min', 'max'])
    
    # Calculate individual year values for plotting (grouped by year and local hour)
    diurnal_years = df_diurnal.groupby(['year', 'hour'])[column].mean().unstack(0)
    
    return diurnal_years, hourly_stats

In [10]:
def plot_seasonal_old(ax, seasonal_data, daily_stats, is_obs=True):
    """
    绘制季节性（日内）数据，不自动添加 legend。
    """
    # 统一颜色：观测用 #1f77b4，模式用 #ff7f0e
    color = '#1f77b4' if is_obs else '#ff7f0e'
    
    # 绘制每年的散点（代表每日均值）
    for _, year in enumerate(seasonal_data.columns):
        ax.scatter(seasonal_data.index, seasonal_data[year],
                   color=color, alpha=0.3, s=1)
    
    # 绘制所有年份的均值折线，
    ax.plot(daily_stats.index, daily_stats['mean'], color=color,
            linewidth=2, alpha=0.79)
    # # 并填充 min/max 区域
    # ax.fill_between(daily_stats.index, daily_stats['min'], daily_stats['max'],
    #                 color=color, alpha=0.2)
    
    ax.set_xlabel('Day of Year')
    ax.grid(True, alpha=0.3)



In [11]:
def plot_seasonal(ax, seasonal_data, daily_stats, is_obs=True):
    """
    绘制季节性（日内）数据，不自动添加 legend。
    """
    # 统一颜色：观测用 #1f77b4，模式用 #ff7f0e
    color = '#1f77b4' if is_obs else '#ff7f0e'
    
    # 绘制每年的散点（代表每日均值）
    for _, year in enumerate(seasonal_data.columns):
        # Convert index to numpy array
        x_values = seasonal_data.index.to_numpy()
        y_values = seasonal_data[year].values
        ax.scatter(x_values, y_values, color=color, alpha=0.3, s=1)
    
    # 绘制所有年份的均值折线，
    # Convert index to numpy array
    x_values = daily_stats.index.to_numpy()
    y_values = daily_stats['mean'].values
    ax.plot(x_values, y_values, color=color, linewidth=2, alpha=0.79)
    # # 并填充 min/max 区域
    # ax.fill_between(daily_stats.index, daily_stats['min'], daily_stats['max'],
    #                 color=color, alpha=0.2)
    
    ax.set_xlabel('Day of Year')
    ax.grid(True, alpha=0.3)

In [12]:
def plot_diurnal_old(ax, diurnal_obs, hourly_stats_obs, diurnal_mod, hourly_stats_mod, show_legend=False):
    """
    Plot diurnal variation on the given (twin) axis.

    Args:
        ax: Matplotlib axis (typically a twin x-axis) to plot diurnal data.
        diurnal_obs: DataFrame with diurnal data (observations) with index=hour.
        hourly_stats_obs: DataFrame with hourly statistics for observations.
        diurnal_mod: DataFrame with diurnal data (model) with index=hour.
        hourly_stats_mod: DataFrame with hourly statistics for model.
        show_legend: Boolean; if True, add the legend.
    """
    # Choose diurnal colors different from seasonal colors
    obs_color = '#7f7f7f'  # e.g., grey for observations
    mod_color = '#e377c2'  # e.g., magenta/pink for model data
    # obs_color = '#7f7f7f'  # e.g., grey for observations
    # mod_color = '#bcbd22'  # e.g., olive for model data

    # --- Plot observed diurnal data ---
    # Scatter points for each year; only the last (iterated) column gets a label.
    for i, year in enumerate(diurnal_obs.columns):
        label = 'Obs Diurnal' if i == len(diurnal_obs.columns) - 1 else None
        ax.scatter(diurnal_obs.index, diurnal_obs[year], color=obs_color, alpha=0.3,
                   s=5, label=label)
    
    # Plot the diurnal mean as a line and fill the min/max envelope.
    # (No label is added so that only the scatter marker shows in the legend.)
    ax.plot(hourly_stats_obs.index, hourly_stats_obs['mean'], color=obs_color,
            linewidth=2, label=None)
    # ax.fill_between(hourly_stats_obs.index, hourly_stats_obs['min'], hourly_stats_obs['max'],
    #                 color=obs_color, alpha=0.1)

    # --- Plot modeled diurnal data ---
    for i, year in enumerate(diurnal_mod.columns):
        label = 'Mod Diurnal' if i == len(diurnal_mod.columns) - 1 else None
        ax.scatter(diurnal_mod.index, diurnal_mod[year], color=mod_color, alpha=0.3,
                   s=5, label=label)
    
    ax.plot(hourly_stats_mod.index, hourly_stats_mod['mean'], color=mod_color,
            linewidth=2, label=None)
    # ax.fill_between(hourly_stats_mod.index, hourly_stats_mod['min'], hourly_stats_mod['max'],
    #                 color=mod_color, alpha=0.1)

    ax.set_xlabel('Local Hour of Day', labelpad=10)
    ax.grid(True, alpha=0.3)

    if show_legend:
        # With the labels set only on the scatter markers, the legend will show four items:
        # seasonal obs (from plot_seasonal), seasonal mod (from plot_seasonal),
        # diurnal obs, and diurnal mod.
        ax.legend(loc='upper left', bbox_to_anchor=(0, 1), ncol=2, framealpha=1, markerscale=5)

In [13]:
def plot_diurnal(ax, diurnal_obs, hourly_stats_obs, diurnal_mod, hourly_stats_mod, show_legend=False):
    """
    Plot diurnal variation on the given (twin) axis.

    Args:
        ax: Matplotlib axis (typically a twin x-axis) to plot diurnal data.
        diurnal_obs: DataFrame with diurnal data (observations) with index=hour.
        hourly_stats_obs: DataFrame with hourly statistics for observations.
        diurnal_mod: DataFrame with diurnal data (model) with index=hour.
        hourly_stats_mod: DataFrame with hourly statistics for model.
        show_legend: Boolean; if True, add the legend.
    """
    # Choose diurnal colors different from seasonal colors
    obs_color = '#7f7f7f'  # e.g., grey for observations
    mod_color = '#e377c2'  # e.g., magenta/pink for model data
    
    # --- Plot observed diurnal data ---
    # Scatter points for each year; only the last (iterated) column gets a label.
    for i, year in enumerate(diurnal_obs.columns):
        label = 'Obs Diurnal' if i == len(diurnal_obs.columns) - 1 else None
        # Convert index to numpy array
        x_values = diurnal_obs.index.to_numpy()
        y_values = diurnal_obs[year].values
        ax.scatter(x_values, y_values, color=obs_color, alpha=0.3, s=5, label=label)
    
    # Plot the diurnal mean as a line
    x_values = hourly_stats_obs.index.to_numpy()
    y_values = hourly_stats_obs['mean'].values
    ax.plot(x_values, y_values, color=obs_color, linewidth=2, label=None)
    
    # --- Plot modeled diurnal data ---
    for i, year in enumerate(diurnal_mod.columns):
        label = 'Mod Diurnal' if i == len(diurnal_mod.columns) - 1 else None
        # Convert index to numpy array
        x_values = diurnal_mod.index.to_numpy()
        y_values = diurnal_mod[year].values
        ax.scatter(x_values, y_values, color=mod_color, alpha=0.3, s=5, label=label)
    
    # Plot the model mean line
    x_values = hourly_stats_mod.index.to_numpy()
    y_values = hourly_stats_mod['mean'].values
    ax.plot(x_values, y_values, color=mod_color, linewidth=2, label=None)
    
    ax.set_xlabel('Local Hour of Day', labelpad=10)
    ax.grid(True, alpha=0.3)
    
    if show_legend:
        ax.legend(loc='upper left', bbox_to_anchor=(0, 1), ncol=2, framealpha=1, markerscale=5)

In [14]:
def plot_time_series_old(ax, obs1, obs2, mod1, mod2, obs_col, mod_col, ylabel, show_legend=False):
    ax.plot(obs1.index, obs1[obs_col], '.', color='#1f77b4',
            label='Obs (’02-’07)', alpha=0.79, markersize=2)
    ax.plot(mod1.index, mod1[mod_col], '.', color='#ff7f0e',
            label='Mod (’02-’07)', alpha=0.69, markersize=2)
    ax.plot(obs2.index, obs2[obs_col], '.', color='#1f77b4',
            label='Obs (’08-’09)', alpha=0.2, markersize=2)
    ax.plot(mod2.index, mod2[mod_col], '.', color='#ff7f0e',
            label='Mod (’08-’09)', alpha=0.2, markersize=2)
    ax.set_xlabel('Time')
    ax.set_ylabel(ylabel)
    ax.grid(True, alpha=0.3)
    
    if show_legend:
        ax.legend(loc='upper right', ncol=4, framealpha=0.5, markerscale=10)


In [15]:
def plot_time_series(ax, obs1, obs2, mod1, mod2, obs_col, mod_col, ylabel, show_legend=False):
    # Convert pandas DatetimeIndex to numpy array before plotting
    # This avoids the multi-dimensional indexing error
    obs1_times = obs1.index.to_numpy()
    obs2_times = obs2.index.to_numpy()
    mod1_times = mod1.index.to_numpy()
    mod2_times = mod2.index.to_numpy()
    
    ax.plot(obs1_times, obs1[obs_col].values, '.', color='#1f77b4',
            label="Obs ('02-'07)", alpha=0.79, markersize=2)
    ax.plot(mod1_times, mod1[mod_col].values, '.', color='#ff7f0e',
            label="Mod ('02-'07)", alpha=0.69, markersize=2)
    ax.plot(obs2_times, obs2[obs_col].values, '.', color='#1f77b4',
            label="Obs ('08-'09)", alpha=0.2, markersize=2)
    ax.plot(mod2_times, mod2[mod_col].values, '.', color='#ff7f0e',
            label="Mod ('08-'09)", alpha=0.2, markersize=2)
    ax.set_xlabel('Time')
    ax.set_ylabel(ylabel)
    ax.grid(True, alpha=0.3)
    
    if show_legend:
        ax.legend(loc='upper right', ncol=4, framealpha=0.5, markerscale=10)


In [16]:
def plot_scatter(ax, obs1, mod1, obs_col, mod_col, show_legend=False):
    common_times = obs1.index.intersection(mod1.index)
    correlation = stats.pearsonr(
        obs1.loc[common_times, obs_col].values,
        mod1.loc[common_times, mod_col].values
    )[0]
    
    ax.scatter(obs1.loc[common_times, obs_col],
               mod1.loc[common_times, mod_col],
               alpha=0.5, color='#1f77b4', s=10)
    
    lims = [
        np.min([ax.get_xlim(), ax.get_ylim()]),
        np.max([ax.get_xlim(), ax.get_ylim()])
    ]
    ax.plot(lims, lims, 'k-', alpha=1, zorder=0, label='1:1 line')
    ax.grid(True, alpha=0.3)
    if show_legend:
        ax.legend()
    return correlation

def create_combined_visualization(df_obs_lh: pd.DataFrame, df_mod_lh: pd.DataFrame,
                                  df_obs_sh: pd.DataFrame, df_mod_sh: pd.DataFrame,
                                  df_obs_gpp: pd.DataFrame, df_mod_psn: pd.DataFrame,
                                  split_index: int,
                                  figure_dpi: int = 60,
                                  savefig_dpi: int = 60) -> None:
    """创建包含所有分析结果的 3x3 图形"""
    # 设置图像参数
    plt.rcParams['figure.dpi'] = figure_dpi
    plt.rcParams['savefig.dpi'] = savefig_dpi
    plt.rcParams['font.size'] = 14
    plt.rcParams['axes.titlesize'] = 16
    plt.rcParams['axes.labelsize'] = 14
    plt.rcParams['xtick.labelsize'] = 13
    plt.rcParams['ytick.labelsize'] = 13
    plt.rcParams['legend.fontsize'] = 13
    
    # 建立 GridSpec 布局
    fig = plt.figure(figsize=(20, 15))
    gs = gridspec.GridSpec(3, 3, width_ratios=[3, 2, 1])
    
    # 数据分割（分为两个时间段）
    obs_lh1, obs_lh2 = split_data_by_index(df_obs_lh, split_index)
    mod_lh1, mod_lh2 = split_data_by_index(df_mod_lh, split_index)
    obs_sh1, obs_sh2 = split_data_by_index(df_obs_sh, split_index)
    mod_sh1, mod_sh2 = split_data_by_index(df_mod_sh, split_index)
    obs_gpp1, obs_gpp2 = split_data_by_index(df_obs_gpp, split_index)
    mod_psn1, mod_psn2 = split_data_by_index(df_mod_psn, split_index)
    
    # 打印原始数据统计信息
    print("\n=== Original Data Statistics ===")
    analyze_data_statistics(obs_lh1, 'Qle_cor', 'Observed Latent Heat')
    analyze_data_statistics(mod_lh1, 'LH', 'Modeled Latent Heat')
    analyze_data_statistics(obs_sh1, 'Qh', 'Observed Sensible Heat')
    analyze_data_statistics(mod_sh1, 'HFX', 'Modeled Sensible Heat')
    analyze_data_statistics(obs_gpp1, 'GPP', 'Observed GPP')
    analyze_data_statistics(mod_psn1, 'PSN', 'Modeled PSN')
    
    # 创建季节性数据
    seasonal_obs_lh, daily_stats_obs_lh = create_seasonal_data(obs_lh1, 'Qle_cor')
    seasonal_mod_lh, daily_stats_mod_lh = create_seasonal_data(mod_lh1, 'LH')
    seasonal_obs_sh, daily_stats_obs_sh = create_seasonal_data(obs_sh1, 'Qh')
    seasonal_mod_sh, daily_stats_mod_sh = create_seasonal_data(mod_sh1, 'HFX')
    seasonal_obs_gpp, daily_stats_obs_gpp = create_seasonal_data(obs_gpp1, 'GPP')
    seasonal_mod_gpp, daily_stats_mod_gpp = create_seasonal_data(mod_psn1, 'PSN')
    
    # 创建日变化数据（仅对第一时间段）
    diurnal_obs_lh, hourly_stats_obs_lh = create_diurnal_data(obs_lh1, 'Qle_cor')
    diurnal_mod_lh, hourly_stats_mod_lh = create_diurnal_data(mod_lh1, 'LH')
    diurnal_obs_sh, hourly_stats_obs_sh = create_diurnal_data(obs_sh1, 'Qh')
    diurnal_mod_sh, hourly_stats_mod_sh = create_diurnal_data(mod_sh1, 'HFX')
    diurnal_obs_gpp, hourly_stats_obs_gpp = create_diurnal_data(obs_gpp1, 'GPP')
    diurnal_mod_gpp, hourly_stats_mod_gpp = create_diurnal_data(mod_psn1, 'PSN')
    
    # （此处略去其他统计打印代码……）
    
    # 定义子图
    # Row 1: Latent Heat
    ax_lh_ts = fig.add_subplot(gs[0, 0])      # 时间序列
    ax_lh_seasonal = fig.add_subplot(gs[0, 1])  # 季节性图（主 x 轴为 day-of-year）
    ax_lh_scatter = fig.add_subplot(gs[0, 2])   # 散点图
    
    # Row 2: Sensible Heat
    ax_sh_ts = fig.add_subplot(gs[1, 0])
    ax_sh_seasonal = fig.add_subplot(gs[1, 1])
    ax_sh_scatter = fig.add_subplot(gs[1, 2])
    
    # Row 3: GPP
    ax_gpp_ts = fig.add_subplot(gs[2, 0])
    ax_gpp_seasonal = fig.add_subplot(gs[2, 1])
    ax_gpp_scatter = fig.add_subplot(gs[2, 2])
    
    # 绘制时间序列图（保持原样）
    plot_time_series(ax_lh_ts, obs_lh1, obs_lh2, mod_lh1, mod_lh2,
                     'Qle_cor', 'LH', 'Latent Heat (W/m²)', True)
    plot_time_series(ax_sh_ts, obs_sh1, obs_sh2, mod_sh1, mod_sh2,
                     'Qh', 'HFX', 'Sensible Heat (W/m²)', False)
    plot_time_series(ax_gpp_ts, obs_gpp1, obs_gpp2, mod_psn1, mod_psn2,
                     'GPP', 'PSN', 'GPP (μmol/m²/s)', False)
    
    # --- 潜热图（第一行） ---
    # 在 ax_lh_seasonal 绘制季节性数据（观测和模式）
    plot_seasonal(ax_lh_seasonal, seasonal_obs_lh, daily_stats_obs_lh, is_obs=True)
    plot_seasonal(ax_lh_seasonal, seasonal_mod_lh, daily_stats_mod_lh, is_obs=False)
    
    # 创建一个 twinx 轴用于绘制日变化数据（转换为本地时间）
    ax_lh_diurnal = ax_lh_seasonal.twiny()
    ax_lh_diurnal.set_xlim(0, 23)
    plot_diurnal(ax_lh_diurnal, diurnal_obs_lh, hourly_stats_obs_lh, diurnal_mod_lh, hourly_stats_mod_lh)
    
    # --- 添加自定义 legend ---

    # 颜色
    obs_season_color = '#1f77b4'
    mod_season_color = '#ff7f0e'
    obs_diurnal_color = '#7f7f7f'  # e.g., grey for observations
    mod_diurnal_color = '#e377c2'  # e.g., magenta/pink for model data

    # 定义一个辅助函数，生成由“填充区域+折线+散点”组成的复合图案
    def create_composite_handle(color):
        # fill = Patch(facecolor=color, edgecolor=color, alpha=0.2)
        line = Line2D([0], [0], color=color, linewidth=2)
        scatter = Line2D([0], [0], marker='o', color=color, linestyle='None', markersize=5)
        # return (fill, line, scatter)
        return (line, scatter)

#     #定义一组顶点（vertices）和对应的绘制指令（codes），构造出一个类似波浪形状的不规则图形。例如：
#     Path = mpath.Path
#     vertices = [
#         (0.0, 0.0), (0.2, 0.1), (0.4, -0.1), (0.6, 0.1), (0.8, -0.1), (1.0, 0.0),
#         (1.0, 1.0), (0.8, 0.9), (0.6, 1.1), (0.4, 0.9), (0.2, 1.1), (0.0, 1.0),
#         (0.0, 0.0)  # 闭合形状
#     ]
#     codes = [
#         Path.MOVETO, Path.CURVE3, Path.CURVE3, Path.CURVE3, Path.CURVE3, Path.CURVE3,
#         Path.LINETO, Path.CURVE3, Path.CURVE3, Path.CURVE3, Path.CURVE3, Path.CURVE3,
#         Path.CLOSEPOLY
#     ]
    
#     wavy_path = mpath.Path(vertices, codes)
#     wavy_patch = mpatches.PathPatch(wavy_path, facecolor=obs_season_color, 
#                                      edgecolor=obs_season_color, alpha=0.2)

    # 四个 legend 项：观测季节性、模式季节性、观测日变化、模式日变化
    legend_handles = [
        create_composite_handle(obs_season_color),
        create_composite_handle(mod_season_color),
        create_composite_handle(obs_diurnal_color),
        create_composite_handle(mod_diurnal_color),
        # Patch(facecolor=obs_season_color, edgecolor=obs_season_color, alpha=0.2), # 方块
        # wavy_patch,
        Line2D([0], [0], marker='o', color=obs_season_color, linestyle='None', markersize=5),
        Line2D([0], [0], color=obs_season_color, linewidth=2)
    ]
    legend_labels = ['Observation Seasonal', 'Modeling Seasonal', 'Observation Diurnal', 'Modeling Diurnal', 'Daily Avg', '6-yr Avg']
    
    # 将 legend 添加到第一行左上角，这里选择在 ax_lh_seasonal 上添加
    ax_lh_seasonal.legend(legend_handles, legend_labels,
                          handler_map={tuple: HandlerTuple(ndivide=None)},
                          loc='upper left', ncol=1)
    
    # Scatter plot with correlation for Latent Heat
    corr_lh = plot_scatter(ax_lh_scatter, obs_lh1, mod_lh1, 'Qle_cor', 'LH', show_legend=True)
    ax_lh_seasonal.set_title('(b) Latent Heat Seasonal+Diurnal (’02-’07)')
    ax_lh_ts.set_title('(a) Latent Heat Time Series (resolution: hour)')
    ax_lh_scatter.set_title(f'(c) Latent Heat Scatter\nCorrelation (’02-’07): {corr_lh:.3f}')
    ax_lh_scatter.set_xlabel('Observed Latent Heat (W/m²)')
    ax_lh_scatter.set_ylabel('Modeled Latent Heat (W/m²)')
    
    # --- Sensible Heat 和 GPP 部分（保持原有绘制，不做 legend 调整） ---
    plot_seasonal(ax_sh_seasonal, seasonal_obs_sh, daily_stats_obs_sh, is_obs=True)
    plot_seasonal(ax_sh_seasonal, seasonal_mod_sh, daily_stats_mod_sh, is_obs=False)
    ax_sh_diurnal = ax_sh_seasonal.twiny()
    ax_sh_diurnal.set_xlim(0, 23)
    plot_diurnal(ax_sh_diurnal, diurnal_obs_sh, hourly_stats_obs_sh, diurnal_mod_sh, hourly_stats_mod_sh)
    
    corr_sh = plot_scatter(ax_sh_scatter, obs_sh1, mod_sh1, 'Qh', 'HFX', show_legend=False)
    ax_sh_ts.set_title('(d) Sensible Heat Time Series')
    ax_sh_seasonal.set_title('(e) Sensible Heat Seasonal+Diurnal (’02-’07)')
    ax_sh_scatter.set_title(f'(f) Sensible Heat Scatter\nCorrelation (’02-’07): {corr_sh:.3f}')
    ax_sh_scatter.set_xlabel('Observed Sensible Heat (W/m²)')
    ax_sh_scatter.set_ylabel('Modeled Sensible Heat (W/m²)')
    
    plot_seasonal(ax_gpp_seasonal, seasonal_obs_gpp, daily_stats_obs_gpp, is_obs=True)
    plot_seasonal(ax_gpp_seasonal, seasonal_mod_gpp, daily_stats_mod_gpp, is_obs=False)
    ax_gpp_diurnal = ax_gpp_seasonal.twiny()
    ax_gpp_diurnal.set_xlim(0, 23)
    plot_diurnal(ax_gpp_diurnal, diurnal_obs_gpp, hourly_stats_obs_gpp, diurnal_mod_gpp, hourly_stats_mod_gpp)
    
    corr_gpp = plot_scatter(ax_gpp_scatter, obs_gpp1, mod_psn1, 'GPP', 'PSN', show_legend=False)
    ax_gpp_ts.set_title('(g) GPP Time Series')
    ax_gpp_seasonal.set_title('(h) GPP Seasonal+Diurnal (’02-’07)')
    ax_gpp_scatter.set_title(f'(i) GPP Scatter\nCorrelation (’02-’07): {corr_gpp:.3f}')
    ax_gpp_scatter.set_xlabel('Observed GPP (μmol/m²/s)')
    ax_gpp_scatter.set_ylabel('Modeled PSN (μmol/m²/s)')
    
    plt.tight_layout()
    # plt.show()
    plt.show(block=False)  # 非阻塞式显示



可以参考 https://fluxnet.org/data/fluxnet2015-dataset/variables-quick-start-guide

保存在ipynb的dpi=60即可，节省空间。最后做出来的海报的dpi = 60。

In [17]:
def main():
    # Constants
    SPLIT_INDEX = 24 * 365 * 5  # 5 years of hourly data
    figure_dpi = 300
    savefig_dpi = 300

    FLUX_FILE = "../obs/US-Syv_2002010106_2009010105_hur_Flux.nc"
    MOD_FILE = "../US-Syv_01/2002010107.LDASOUT_DOMAIN1"
    
    # Process all data
    df_obs_lh = process_latent_heat_data(FLUX_FILE)
    df_mod_lh, df_mod_sh, df_mod_psn = process_model_data(MOD_FILE)
    df_obs_sh = process_sensible_heat_data(FLUX_FILE)
    df_obs_gpp = process_gpp_data(FLUX_FILE)
    
    # Limit observations to model time period
    df_obs_lh = df_obs_lh[df_obs_lh.index <= df_mod_lh.index.max()]
    df_obs_sh = df_obs_sh[df_obs_sh.index <= df_mod_sh.index.max()]
    df_obs_gpp = df_obs_gpp[df_obs_gpp.index <= df_mod_psn.index.max()]
    
    # Create visualization
    create_combined_visualization(
        df_obs_lh, df_mod_lh,
        df_obs_sh, df_mod_sh,
        df_obs_gpp, df_mod_psn,
        SPLIT_INDEX, figure_dpi, savefig_dpi
    )

In [18]:
if __name__ == "__main__":
    main()    

AttributeError: 'Context' object has no attribute 'pattern'

In [None]:

# Method 1: If using pip
# This will output all installed packages with their exact versions
# !pip freeze > requirements.txt
