# Heat Event Identification

## Set Up

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
from scipy import stats
import sys
import os

# Update paths to get source code from notebook_utils
curr_dir = os.path.dirname(os.path.abspath('notebooks'))
proj_dir = os.path.dirname(curr_dir)
src_path = os.path.join(proj_dir, 'src')
sys.path.append(src_path)

from notebook_utils.preprocessing import *
from notebook_utils.heat_identification import *

# Create combined dataframe from ghcn_cleaned files
CA_stations_dfs = combine_files_to_dfs('../data/processed/ghcn_cleaned')

Processed file: CA_2003_clean.csv
Processed file: CA_2004_clean.csv
Processed file: CA_2005_clean.csv
Processed file: CA_2006_clean.csv
Processed file: CA_2007_clean.csv
Processed file: CA_2008_clean.csv
Processed file: CA_2009_clean.csv
Processed file: CA_2010_clean.csv
Processed file: CA_2011_clean.csv
Processed file: CA_2012_clean.csv
Processed file: CA_2013_clean.csv
Processed file: CA_2014_clean.csv
Processed file: CA_2015_clean.csv
Processed file: CA_2016_clean.csv
Processed file: CA_2017_clean.csv
Processed file: CA_2018_clean.csv
Processed file: CA_2019_clean.csv
Processed file: CA_2020_clean.csv
Processed file: CA_2021_clean.csv
Processed file: CA_2022_clean.csv
Processed file: CA_2023_clean.csv


# Heat Event Definition

1. CTX95pct. The threshold is the calendar day 95th percentile of Tmax based on a 15-day window centered on a given day; that is, there is a different percentile threshold for each day of the year to account for the seasonal cycle.

Source for heat event and heat metric definitions: Hulley, G.C., Dousset, B. and Kahn, B.H., 2020. Rising trends in heatwave metrics across southern California. Earth's Future, 8(7), p.e2020EF001480.

### Extract Daily Maximum and Minimum

In [2]:
CA_stations_dfs['datetime'] = pd.to_datetime(CA_stations_dfs['datetime'])
CA_stations_dfs['Date'] = CA_stations_dfs['datetime'].dt.date

# Calculate daily maximum and minimum temperatures
daily_temp = CA_stations_dfs.groupby(['Station_name', 'Latitude', 'Longitude', 'Date']).agg( Tmax =('Temperature', 'max')).reset_index()

# Identify Heat Events

### Calculate 95th percentile for Tmax and Tmin Based on 15-Day Window

In [3]:
# Calculate 95th percentile of daily maximum and minimum temperatures
daily_temp['CTX95pct'] = daily_temp.groupby('Station_name')['Tmax'].apply(calc_rolling_percentile).reset_index(drop=True)

### Find Heat Events

In [4]:
# Identify heat events based on 95th percentile of daily maximum and minimum temperatures
daily_temp['heat_event'] = daily_temp['Tmax'] > daily_temp['CTX95pct']

In [5]:
daily_temp.head()

Unnamed: 0,Station_name,Latitude,Longitude,Date,Tmax,CTX95pct,heat_event
0,ALTURAS MUNI AP,41.4836,-120.5614,2003-01-01,13.4,,False
1,ALTURAS MUNI AP,41.4836,-120.5614,2003-01-02,13.7,,False
2,ALTURAS MUNI AP,41.4836,-120.5614,2003-01-03,15.3,,False
3,ALTURAS MUNI AP,41.4836,-120.5614,2003-01-04,15.8,,False
4,ALTURAS MUNI AP,41.4836,-120.5614,2003-01-05,13.7,,False


# Heat Event Metrics

* Frequency: number of heatwaves per year within detection window.
* Duration: total number of contiguous days from start to end of heatwave event.
* Intensity: maximum temperature during a heatwave event minus the 95th percentile climatology of temperatures for a 15-day window centered on the heatwave event.
(Hulley, 2020)

### Identify Heat Event Groups

In [6]:
heat_groups_df = identify_heat_groups(daily_temp)
heat_groups_df

Unnamed: 0,Station_name,Latitude,Longitude,Date,Tmax,CTX95pct,heat_event,heat_event_group
0,ALTURAS MUNI AP,41.4836,-120.5614,2003-05-10,18.0,17.37,True,1
1,ALTURAS MUNI AP,41.4836,-120.5614,2003-05-11,19.5,18.45,True,1
2,ALTURAS MUNI AP,41.4836,-120.5614,2003-05-12,22.4,20.37,True,1
3,ALTURAS MUNI AP,41.4836,-120.5614,2003-05-13,23.0,22.58,True,1
4,ALTURAS MUNI AP,41.4836,-120.5614,2003-05-21,26.0,24.67,True,2
...,...,...,...,...,...,...,...,...
12471,YOSEMITE VILLAGE 12 W,37.7592,-119.8208,2023-03-12,15.4,15.05,True,3699
12472,YOSEMITE VILLAGE 12 W,37.7592,-119.8208,2023-03-13,17.0,15.88,True,3699
12473,YOSEMITE VILLAGE 12 W,37.7592,-119.8208,2023-04-08,18.0,17.58,True,3700
12474,YOSEMITE VILLAGE 12 W,37.7592,-119.8208,2023-04-09,21.6,19.08,True,3700


### Calculate Heat Event Metrics

In [7]:
heat_event_metrics = calc_heat_event_metrics(heat_groups_df)
heat_event_metrics

Unnamed: 0,Station_name,Year,heat_event_group,Start_date,End_date,Duration,Intensity,Frequency
0,ALTURAS MUNI AP,2003,1,2003-05-10,2003-05-13,4,0.42,3
1,ALTURAS MUNI AP,2003,2,2003-05-21,2003-05-23,3,1.40,3
2,ALTURAS MUNI AP,2003,3,2003-07-17,2003-07-19,3,0.70,3
3,ALTURAS MUNI AP,2004,4,2004-02-08,2004-02-12,5,0.63,4
4,ALTURAS MUNI AP,2004,5,2004-03-06,2004-03-08,3,0.98,4
...,...,...,...,...,...,...,...,...
3701,YOSEMITE VILLAGE 12 W,2022,3696,2022-02-06,2022-02-10,5,0.49,5
3702,YOSEMITE VILLAGE 12 W,2022,3697,2022-09-03,2022-09-05,3,1.19,5
3703,YOSEMITE VILLAGE 12 W,2022,3698,2022-12-23,2022-12-25,3,0.42,5
3704,YOSEMITE VILLAGE 12 W,2023,3699,2023-03-11,2023-03-13,3,1.12,2


In [8]:
heat_event_metrics['Frequency'].describe()

count    3706.000000
mean        3.282245
std         1.534205
min         1.000000
25%         2.000000
50%         3.000000
75%         4.000000
max         8.000000
Name: Frequency, dtype: float64

# Visualize Heat Events