This Jupyter Notebook loads climate projection data, identifies heatwave events, and analyses their frequency, duration, and intensity, producing visualizations to compare trends across historical and future climate scenarios.

In [8]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import os

# Define parameters
MODELS = ["CESM2-WACCM"]  # You can add more models if available
HISTORICAL_PERIOD = slice("1985-01-01", "2014-12-31")
MID_FUTURE = slice("2041-01-01", "2070-12-31")
FAR_FUTURE = slice("2071-01-01", "2100-12-31")

# Create figures directory if it doesn't exist
os.makedirs('figures', exist_ok=True)

# Define your custom color palette
scenario_colors = {
    'Historical': 'black',
    'SSP245': 'royalblue', 
    'SSP585': 'firebrick'
}

def extract_la_data(file_path):
    """Extract Los Angeles temperature data from CMIP6 netCDF file"""
    # Load dataset with proper time decoding - use CF time coder for CMIP6 data
    time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
    ds = xr.open_dataset(file_path, decode_times=time_coder)
    
    # Convert longitude to -180 to 180 if needed
    if ds.lon.max() > 180:
        ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
        ds = ds.sortby('lon')
    
    # Los Angeles coordinates (approximate)
    target_lat, target_lon = 34.1, -118.3
    
    # Find nearest grid point
    lat_idx = np.abs(ds.lat.values - target_lat).argmin()
    lon_idx = np.abs(ds.lon.values - target_lon).argmin()
    la_lat = ds.lat.values[lat_idx]
    la_lon = ds.lon.values[lon_idx]
    
    print(f"Using coordinates: lat={la_lat}, lon={la_lon}")
    
    # Extract temperature data for LA and convert from Kelvin to Celsius
    la_temp = ds['tas'].sel(lat=la_lat, lon=la_lon) - 273.15
    
    return la_temp

def analyze_extreme_temps(historical, ssp245, ssp585):
    """Analyze and visualize extreme temperature metrics"""
    # Create dataframes from xarray objects
    hist_df = historical.to_dataframe().reset_index()
    ssp245_df = ssp245.to_dataframe().reset_index()
    ssp585_df = ssp585.to_dataframe().reset_index()
    
    # Add scenario labels
    hist_df['scenario'] = 'Historical'
    ssp245_df['scenario'] = 'SSP245'
    ssp585_df['scenario'] = 'SSP585'
    
    # Extract year and month (handle cftime objects correctly)
    for df in [hist_df, ssp245_df, ssp585_df]:
        # Extract year and month attributes directly from cftime objects
        df['year'] = df['time'].apply(lambda x: x.year)
        df['month'] = df['time'].apply(lambda x: x.month)
    
    # Convert time to string for comparisons
    hist_df['time_str'] = hist_df['time'].astype(str)
    ssp245_df['time_str'] = ssp245_df['time'].astype(str)
    ssp585_df['time_str'] = ssp585_df['time'].astype(str)
    
    # Split future data into mid and far future periods
    ssp245_mid = df[df['year'].between(2041, 2070)].copy()
    ssp245_far = df[df['year'].between(2071, 2100)].copy()
    ssp245_mid['period'] = 'Mid-Century (2041-2070)'
    ssp245_far['period'] = 'Late-Century (2071-2100)'

    ssp585_mid = df[df['year'].between(2041, 2070)].copy()
    ssp585_far = df[df['year'].between(2071, 2100)].copy()
    ssp585_mid['period'] = 'Mid-Century (2041-2070)'
    ssp585_far['period'] = 'Late-Century (2071-2100)'
    
    hist_df['period'] = 'Historical (1985-2014)'
    
    # Combine for comparative plots
    combined_df = pd.concat([hist_df, ssp245_mid, ssp245_far, ssp585_mid, ssp585_far])
    
    # Plot 1: Monthly temperature extremes comparison
    plt.figure(figsize=(14, 8))
    
    # Calculate extreme metrics for each scenario and period
    periods = ['Historical (1985-2014)', 'Mid-Century (2041-2070)', 'Late-Century (2071-2100)']
    scenarios = ['Historical', 'SSP245', 'SSP585']
    
    # Create monthly summaries for each scenario/period combination
    monthly_extremes = []
    
    for period in periods:
        for scenario in scenarios:
            # Skip combinations that don't exist
            if period == 'Historical (1985-2014)' and scenario != 'Historical':
                continue
            if period != 'Historical (1985-2014)' and scenario == 'Historical':
                continue
                
            subset = combined_df[(combined_df['period'] == period) & (combined_df['scenario'] == scenario)]
            
            if not subset.empty:
                # Group by month and calculate statistics
                monthly_stats = subset.groupby('month')['tas'].agg(['max', 'mean', 'std']).reset_index()
                monthly_stats['scenario'] = scenario
                monthly_stats['period'] = period
                monthly_extremes.append(monthly_stats)
    
    monthly_extremes_df = pd.concat(monthly_extremes)
    
    # Create a custom color palette for the periods based on scenarios
    period_colors = {
        'Historical (1985-2014)': scenario_colors['Historical'],
        'Mid-Century (2041-2070)': None,  # Will be assigned below
        'Late-Century (2071-2100)': None   # Will be assigned below
    }
    
    # Assign colors to periods based on which scenarios are present in each period
    for period in ['Mid-Century (2041-2070)', 'Late-Century (2071-2100)']:
        period_data = monthly_extremes_df[monthly_extremes_df['period'] == period]
        if 'SSP245' in period_data['scenario'].values:
            period_colors[period] = scenario_colors['SSP245']
        elif 'SSP585' in period_data['scenario'].values:
            period_colors[period] = scenario_colors['SSP585']
    
    # Plot the maximum monthly temperatures across scenarios and periods
    g = sns.lineplot(data=monthly_extremes_df, x='month', y='max', hue='period', style='scenario', 
                 markers=True, dashes=False, linewidth=2.5, palette=period_colors)
    
    # Modify the legend to show correct colors for each scenario
    handles, labels = g.get_legend_handles_labels()
    g.legend(handles=handles, labels=labels, title='Scenario', fontsize=12)
    
    plt.title('Maximum Monthly Temperatures in Los Angeles: Historical vs Future Projections', fontsize=16)
    plt.xlabel('Month', fontsize=14)
    plt.ylabel('Maximum Temperature (°C)', fontsize=14)
    plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
    plt.grid(True, linestyle='--', alpha=0.7)
    
    plt.tight_layout()
    plt.savefig('figures/la_monthly_max_temps.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Plot 2: Annual extreme temperature metrics over time
    plt.figure(figsize=(14, 10))
    
    # Calculate annual metrics
    annual_metrics = []
    
    for df, scenario in [(hist_df, 'Historical'), (ssp245_df, 'SSP245'), (ssp585_df, 'SSP585')]:
        # Calculate annual maximum and 95th percentile temperatures
        annual_stats = df.groupby('year')['tas'].agg(['max', 
                                                     lambda x: np.percentile(x, 95)]).reset_index()
        annual_stats.columns = ['year', 'annual_max', 'annual_p95']
        annual_stats['scenario'] = scenario
        annual_metrics.append(annual_stats)
    
    annual_metrics_df = pd.concat(annual_metrics)
    
    # Plot annual maximum temperatures
    plt.subplot(2, 1, 1)
    sns.scatterplot(data=annual_metrics_df, x='year', y='annual_max', hue='scenario', 
                   palette=scenario_colors, alpha=0.6)
    
    # Add trend lines with custom colors
    for scenario in ['Historical', 'SSP245', 'SSP585']:
        scenario_data = annual_metrics_df[annual_metrics_df['scenario'] == scenario]
        if len(scenario_data) > 1:
            x = scenario_data['year']
            y = scenario_data['annual_max']
            
            # Calculate trend line
            z = np.polyfit(x, y, 1)
            p = np.poly1d(z)
            
            # Plot trend line with custom color
            plt.plot(x, p(x), linestyle='--', color=scenario_colors[scenario],
                     label=f"{scenario} Trend: {z[0]:.3f}°C/year ({z[0]*10:.2f}°C/decade)")
    
    plt.title('Annual Maximum Temperatures in Los Angeles (1985-2100)', fontsize=16)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('Maximum Temperature (°C)', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(fontsize=10)
    
    # Plot annual 95th percentile temperatures
    plt.subplot(2, 1, 2)
    sns.scatterplot(data=annual_metrics_df, x='year', y='annual_p95', hue='scenario', 
                   palette=scenario_colors, alpha=0.6)
    
    # Add trend lines with custom colors
    for scenario in ['Historical', 'SSP245', 'SSP585']:
        scenario_data = annual_metrics_df[annual_metrics_df['scenario'] == scenario]
        if len(scenario_data) > 1:
            x = scenario_data['year']
            y = scenario_data['annual_p95']
            
            # Calculate trend line
            z = np.polyfit(x, y, 1)
            p = np.poly1d(z)
            
            # Plot trend line with custom color
            plt.plot(x, p(x), linestyle='--', color=scenario_colors[scenario],
                     label=f"{scenario} Trend: {z[0]:.3f}°C/year ({z[0]*10:.2f}°C/decade)")
    
    plt.title('Annual 95th Percentile Temperatures in Los Angeles (1985-2100)', fontsize=16)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('95th Percentile Temperature (°C)', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(fontsize=10)
    
    plt.tight_layout()
    plt.savefig('figures/la_annual_extreme_temps.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # For comparison with original notebook - create a simplified analysis of "extreme temperature events"
    # Here we'll define an extreme event as a month with temperature above the 90th percentile for that month
    plt.figure(figsize=(12, 8))
    
    # Calculate monthly thresholds based on historical period
    monthly_thresholds = hist_df.groupby('month')['tas'].quantile(0.9).reset_index()
    monthly_thresholds.columns = ['month', 'threshold']
    
    # Merge thresholds with data
    hist_with_threshold = pd.merge(hist_df, monthly_thresholds, on='month')
    ssp245_with_threshold = pd.merge(ssp245_df, monthly_thresholds, on='month')
    ssp585_with_threshold = pd.merge(ssp585_df, monthly_thresholds, on='month')
    
    # Mark extreme months
    hist_with_threshold['extreme'] = hist_with_threshold['tas'] > hist_with_threshold['threshold']
    ssp245_with_threshold['extreme'] = ssp245_with_threshold['tas'] > ssp245_with_threshold['threshold']
    ssp585_with_threshold['extreme'] = ssp585_with_threshold['tas'] > ssp585_with_threshold['threshold']
    
    # Count extreme months per year
    hist_extremes = hist_with_threshold.groupby('year')['extreme'].sum().reset_index()
    hist_extremes['scenario'] = 'Historical'
    
    ssp245_extremes = ssp245_with_threshold.groupby('year')['extreme'].sum().reset_index()
    ssp245_extremes['scenario'] = 'SSP245'
    
    ssp585_extremes = ssp585_with_threshold.groupby('year')['extreme'].sum().reset_index()
    ssp585_extremes['scenario'] = 'SSP585'
    
    # Combine results
    all_extremes = pd.concat([hist_extremes, ssp245_extremes, ssp585_extremes])
    
    # Create plot with custom colors
    sns.scatterplot(data=all_extremes, x='year', y='extreme', hue='scenario', 
                   palette=scenario_colors, alpha=0.6)
    
    # Add trend lines with custom colors
    for scenario in ['Historical', 'SSP245', 'SSP585']:
        scenario_data = all_extremes[all_extremes['scenario'] == scenario]
        if len(scenario_data) > 1:
            x = scenario_data['year']
            y = scenario_data['extreme']
            
            # Calculate trend line
            z = np.polyfit(x, y, 1)
            p = np.poly1d(z)
            
            # Plot trend line with custom color
            plt.plot(x, p(x), linestyle='--', color=scenario_colors[scenario],
                     label=f"{scenario} Trend: {z[0]:.3f} events/year ({z[0]*10:.1f}/decade)")
    
    plt.title('Number of Extreme Temperature Months per Year in Los Angeles', fontsize=16)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('Number of Extreme Temperature Months', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(fontsize=10)
    
    plt.tight_layout()
    plt.savefig('figures/la_extreme_temp_months.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print("Analysis complete! Visualizations have been saved to the 'figures' directory.")

# Main function
def main():
    print("Loading CMIP6 temperature datasets...")
    
    # Load datasets (update paths to your file locations)
    hist_temp = extract_la_data("datasets/cmip6_tas_historical_CESM2-WACCM.nc")
    ssp245_temp = extract_la_data("datasets/cmip6_tas_ssp245_CESM2-WACCM.nc")
    ssp585_temp = extract_la_data("datasets/cmip6_tas_ssp585_CESM2-WACCM.nc")
    
    # Filter to relevant time periods
    hist_temp = hist_temp.sel(time=HISTORICAL_PERIOD)
    ssp245_temp = ssp245_temp.sel(time=slice("2015-01-01", "2100-12-31"))
    ssp585_temp = ssp585_temp.sel(time=slice("2015-01-01", "2100-12-31"))
    
    # Run analysis and visualization
    analyze_extreme_temps(hist_temp, ssp245_temp, ssp585_temp)

if __name__ == "__main__":
    main()

Loading CMIP6 temperature datasets...
Using coordinates: lat=34.3979057591623, lon=-118.75
Using coordinates: lat=34.3979057591623, lon=-118.75
Using coordinates: lat=34.3979057591623, lon=-118.75
Analysis complete! Visualizations have been saved to the 'figures' directory.
