<a href="https://colab.research.google.com/github/nicolli-decastro/portifolio/blob/main/associating_one_lighting_strike_per_fire.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Importing Libraries and Files

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
from google.colab import files

In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    COLAB = True
    print("Note: using Google Colab")
    %tensorflow_version 2.x
except:
    print("Note: not using Google CoLab")
    COLAB = False

Mounted at /content/drive
Note: using Google CoLab
Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
# file address
# file = '/content/drive/MyDrive/FNN/2022_with_fire_mrms_v7 (1).csv'
file = '/content/2022_with_fire_mrms_v7 (1) (1).csv'
df_file = pd.read_csv(file)
df_file[:5]
# print(df_file.columns.values)

Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h_ltg,precip24h+1h_ltg,precip24h+2h_ltg,precip24h_day_ltg,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire
0,48836832,2022-06-23 21:58:46.653697968,29.005194,-81.013071,-7700,140,52,114,G,32.504389,...,0.0,0.047244,0.291339,0.92126,0.0,0.377953,1.137795,1.137795,1.137795,0.574803
1,42993303,2022-06-14 23:03:05.751105309,29.344263,-81.542652,6951,178,55,109,G,28.95472,...,0.614173,1.653543,1.653543,0.0,0.0,0.0,0.082677,0.102362,0.102362,0.0
2,22735983,2022-05-04 19:41:02.999083757,26.35911,-80.416652,-15985,163,73,121,G,29.274231,...,0.0,0.011811,0.011811,0.051181,0.0,0.007874,0.0,0.0,0.0,0.0
3,22087195,2022-05-02 20:48:02.668427944,29.715181,-83.139749,-20308,123,55,122,G,28.880819,...,0.0,0.440945,0.590551,0.0,0.019685,0.0,0.0,0.0,0.0,0.035433
4,48847410,2022-06-23 22:23:40.393073797,28.98171,-81.026099,-20876,133,58,117,G,32.475239,...,0.019685,0.562992,0.574803,0.897638,0.0,0.681102,1.074803,1.681102,2.0,0.0


#### Column Names

In [None]:
print(df_file.columns.values)
df_file['f_start']

['idx' 'dttime_utc' 'ltg_lat' 'ltg_lon' 'polarity' 'ell_smajor'
 'ell_sminor' 'ell_angle' 'striketype' 'tmpc' 'tmpc-1h' 'tmpc-2h'
 'tmpc-3h' 'tmpc-4h' 'relh' 'relh-1h' 'relh-2h' 'relh-3h' 'relh-4h' 'sknt'
 'sknt-1h' 'sknt-2h' 'sknt-3h' 'sknt-4h' 'p01i' 'p01i-1h' 'p01i-2h'
 'p01i-3h' 'p01i-4h' 'p_obs' 'p_obs-1d' 'p_obs-2d' 'p_obs-3d' 'p_obs-4d'
 'p_obs+1d' 'p_obs+2d' 'p_obsS14d' 'p_nor' 'p_nor-1d' 'p_nor-2d'
 'p_nor-3d' 'p_nor-4d' 'p_nor+1d' 'p_nor+2d' 'p_norM14d' 'landcover' 'EVI'
 'ERC' 'BI' 'SC' 'IC' 'BIC' 'DSR' 'DC' 'DMC' 'dfmc-1h' 'dfmc-10h'
 'dfmc-100h' 'dfmc-1000h' 'RHMax' 'RHMin' 'KBDI' 'fid' 'county' 'funit'
 'region' 'f_name' 'icode' 'f_lat' 'f_lon' 'f_area' 'size_class'
 'cause_cat' 'cause_type' 'f_start' 'cont_time' 'precip24h_ltg'
 'precip24h+1h_ltg' 'precip24h+2h_ltg' 'precip24h_day_ltg'
 'precip24h-1d_ltg' 'precip24h_day_fire' 'precip24h_fire'
 'precip24h+1h_fire' 'precip24h+2h_fire' 'precip24h-1d_fire']


0        2022.06.25 4:31
1       2022.06.19 19:29
2       2022.05.04 23:08
3       2022.05.03 23:39
4       2022.06.24 19:05
              ...       
5832    2022.07.25 20:00
5833     2022.03.18 0:16
5834     2022.10.15 1:37
5835    2022.04.06 17:49
5836    2022.01.11 19:47
Name: f_start, Length: 5837, dtype: object

#### Finding Duplicate Lighting Strike Time

In [None]:
# Finding Rows with the Same inx and f_name
duplicates = df_file[df_file.duplicated(subset=['idx', 'f_name'], keep=False)] # keep=False ensures that all duplicates are shown, not just the second occurrence
print(len(duplicates))

# Counting the number of Data Points that have the same idx and f_name information
num_duplicates = duplicates.groupby('idx')['f_name'].count()
print(len(num_duplicates))

duplicates.head()

0
0


Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h_ltg,precip24h+1h_ltg,precip24h+2h_ltg,precip24h_day_ltg,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire


In [None]:
print(num_duplicates)

Series([], Name: f_name, dtype: int64)


In [None]:
idx_more_than_2 = num_duplicates[num_duplicates > 2]
print(idx_more_than_2)

Series([], Name: f_name, dtype: int64)


## 2. Dropping Duplicate

In [None]:
df_fires = df_file.drop_duplicates()
print(len(df_fires))
df_fires.head()

5837


Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h_ltg,precip24h+1h_ltg,precip24h+2h_ltg,precip24h_day_ltg,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire
0,48836832,2022-06-23 21:58:46.653697968,29.005194,-81.013071,-7700,140,52,114,G,32.504389,...,0.0,0.047244,0.291339,0.92126,0.0,0.377953,1.137795,1.137795,1.137795,0.574803
1,42993303,2022-06-14 23:03:05.751105309,29.344263,-81.542652,6951,178,55,109,G,28.95472,...,0.614173,1.653543,1.653543,0.0,0.0,0.0,0.082677,0.102362,0.102362,0.0
2,22735983,2022-05-04 19:41:02.999083757,26.35911,-80.416652,-15985,163,73,121,G,29.274231,...,0.0,0.011811,0.011811,0.051181,0.0,0.007874,0.0,0.0,0.0,0.0
3,22087195,2022-05-02 20:48:02.668427944,29.715181,-83.139749,-20308,123,55,122,G,28.880819,...,0.0,0.440945,0.590551,0.0,0.019685,0.0,0.0,0.0,0.0,0.035433
4,48847410,2022-06-23 22:23:40.393073797,28.98171,-81.026099,-20876,133,58,117,G,32.475239,...,0.019685,0.562992,0.574803,0.897638,0.0,0.681102,1.074803,1.681102,2.0,0.0


## 3. Converting Precipitation Values from Inches to mm

In [None]:
# Copy of data frame to change precipitation values from inches to mm
df_fires_mm = df_fires.copy()

# Columns with precipitation values (all in inches)
columns_to_convert = [
    'precip24h_ltg', 'precip24h+1h_ltg', 'precip24h+2h_ltg', 'precip24h_day_ltg',
    'precip24h-1d_ltg', 'precip24h_day_fire', 'precip24h_fire',
    'precip24h+1h_fire', 'precip24h+2h_fire', 'precip24h-1d_fire'
]

# Convertion of inches to mm in columns_to_convert
for col in columns_to_convert:
    df_fires_mm[col] = df_fires_mm[col] * 25.4

df_fires_mm.head()

Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h_ltg,precip24h+1h_ltg,precip24h+2h_ltg,precip24h_day_ltg,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire
0,48836832,2022-06-23 21:58:46.653697968,29.005194,-81.013071,-7700,140,52,114,G,32.504389,...,0.0,1.199998,7.400011,23.400004,0.0,9.600006,28.899993,28.899993,28.899993,14.599996
1,42993303,2022-06-14 23:03:05.751105309,29.344263,-81.542652,6951,178,55,109,G,28.95472,...,15.599994,41.999992,41.999992,0.0,0.0,0.0,2.099996,2.599995,2.599995,0.0
2,22735983,2022-05-04 19:41:02.999083757,26.35911,-80.416652,-15985,163,73,121,G,29.274231,...,0.0,0.299999,0.299999,1.299997,0.0,0.2,0.0,0.0,0.0,0.0
3,22087195,2022-05-02 20:48:02.668427944,29.715181,-83.139749,-20308,123,55,122,G,28.880819,...,0.0,11.200003,14.999995,0.0,0.499999,0.0,0.0,0.0,0.0,0.899998
4,48847410,2022-06-23 22:23:40.393073797,28.98171,-81.026099,-20876,133,58,117,G,32.475239,...,0.499999,14.299997,14.599996,22.800005,0.0,17.299991,27.299996,42.699991,50.8,0.0


In [None]:
df_fires_mm.columns.dtype

dtype('O')

# 4. Matching Fires with One Lighting Strike

In [None]:
print(df_fires_mm['f_name'].values)

['Bomb Crater (64)' 'LOGGING TRAIL (64)' 'L39(50)' ...
 'Rattlesnake Rd (22)' '30th Ave. S.E.' 'PANDORA (53)']


In [None]:
df_fires_mm['dttime_utc'] = pd.to_datetime(df_fires_mm['dttime_utc'])

## 4.1 Haversine Distance Function

In [None]:
def haversine_distance(lat1, lon1, lat2, lon2):
    """Calculate the Haversine distance between two points on the earth."""
    R = 6371  # Radius of the earth in kilometers
    dlat = np.radians(lat2 - lat1)
    dlon = np.radians(lon2 - lon1)
    a = (np.sin(dlat / 2) * np.sin(dlat / 2) +
         np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) *
         np.sin(dlon / 2) * np.sin(dlon / 2))
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    d = R * c
    return d


## 4.2 Calculating Distances Between Lighting Strike and Fire

In [None]:
# Pair each distance with the lightning index and fire name
fire_coords = df_fires_mm[['f_lat', 'f_lon']].values
lightning_coords = df_fires_mm[['ltg_lat', 'ltg_lon']].values

distances = haversine_distance(fire_coords[:, 0], fire_coords[:, 1],
                                          lightning_coords[:, 0], lightning_coords[:, 1])

# Add distances to the original dataframe
df_fires_mm['distance'] = distances

df_fires_mm.head()
print(df_fires_mm[['ltg_lat','ltg_lon','f_lat','f_lon','distance']])

        ltg_lat    ltg_lon      f_lat      f_lon  distance
0     29.005194 -81.013071  28.985372 -81.022072  2.371575
1     29.344263 -81.542652  29.325328 -81.532217  2.335893
2     26.359110 -80.416652  26.361389 -80.404167  1.269497
3     29.715181 -83.139749  29.718056 -83.140278  0.323690
4     28.981710 -81.026099  28.976667 -81.006528  1.984627
...         ...        ...        ...        ...       ...
5832  27.139544 -80.864259  27.138328 -80.870125  0.596003
5833  27.275933 -82.426401  27.274722 -82.414722  1.162059
5834  26.845934 -81.537048  26.849822 -81.540381  0.544276
5835  26.197760 -81.508234  26.189308 -81.492533  1.826821
5836  27.807714 -81.398115  27.795278 -81.381111  2.170138

[5837 rows x 5 columns]


## 4.3 Filtering for Distances <= 2km

In [None]:
distance_list = []

# Pair each distance with the lightning index and fire name
for idx, row in df_fires_mm.iterrows():
    dist = row['distance']
    if dist <=2:
      lightning_time = row['dttime_utc']
      fire_start_time = pd.to_datetime(row['f_start'])
      distance_list.append({'f_name': row['f_name'],
                            'lightning_idx': row['idx'],
                            'distance': dist,
                            'time_difference': fire_start_time - lightning_time
                            })

df_distances = pd.DataFrame(distance_list)
df_distances.head()
print(len(df_distances))


4528


In [None]:
print(len(df_fires_mm))

5837


In [None]:
fire_names_unique = df_fires_mm['f_name'].unique()
print(len(fire_names_unique))

418


In [None]:
fire_names_dist = df_distances['f_name'].unique()
print(len(fire_names_dist))

410


## 4.4 Checking Fires with no Lighting Distance <= 2 km

In [None]:
excluded_list = []

# Pair each distance with the lightning index and fire name
for idx, row in df_fires_mm.iterrows():
    dist = row['distance']
    if dist >2:
      lightning_time = row['dttime_utc']
      fire_start_time = pd.to_datetime(row['f_start'])
      excluded_list.append({'f_name': row['f_name'],
                            'lightning_idx': row['idx'],
                            'distance': dist,
                            'time_difference': fire_start_time - lightning_time
                            })

df_excluded_fires = pd.DataFrame(excluded_list)
df_excluded_fires.head()
print(len(df_excluded_fires))

1309


In [None]:
fire_names = df_excluded_fires['f_name'].unique()
print(len(fire_names))

221


## 4.5 Calculating and Categorizing Lag Day

In [None]:
# Calculate and categorize the Lag day
df_distances['time_difference'] = df_distances['time_difference']
df_distances['lag_day'] = df_distances['time_difference'].dt.days

df_distances['type'] = df_distances['lag_day'].apply(
    lambda x: 'promptly-detected' if x in [0, 1] else 'holdover')

df_distances.head()

Unnamed: 0,f_name,lightning_idx,distance,time_difference,lag_day,type
0,L39(50),22735983,1.269497,0 days 03:26:57.000916243,0,promptly-detected
1,Anderson Grade (15),22087195,0.32369,1 days 02:50:57.331572056,1,promptly-detected
2,CHAMPIONS ROAD (64),48847410,1.984627,0 days 20:41:19.606926203,0,promptly-detected
3,Landfill(026),15877204,1.486524,1 days 07:08:06.068401098,1,promptly-detected
4,719 HWY 100 (54),41312114,1.651014,0 days 00:14:46.923069,0,promptly-detected


## 4.6 Matching Fires with Smallest Time Difference from Lighting Strike

In [None]:
# Filter the rows where lag_day is less than or equal to 5
filtered_df = df_distances[df_distances['lag_day'] <= 5]

# Group by fire name and take the lightning with the smallest time difference
df_matched = filtered_df.loc[filtered_df.groupby('f_name')['time_difference'].idxmin()]
df_matched.head()

Unnamed: 0,f_name,lightning_idx,distance,time_difference,lag_day,type
3972,100th Lane Fire (50),47996127,0.327154,0 days 00:45:37.587029457,0,promptly-detected
610,104TH ST,27998399,0.402048,0 days 00:13:42.313968658,0,promptly-detected
2094,107TH TERR (01),36709792,0.569952,1 days 05:10:32.172584772,1,promptly-detected
2454,118 RD (19-T6) 0381,26389121,1.969164,1 days 05:56:08.640345097,1,promptly-detected
1906,128th Street 47),35551006,1.109048,2 days 03:17:41.913389921,2,holdover


In [None]:
# Checking for Fires with Lag Day > 5
lag_day_more_5 = df_distances[df_distances['lag_day'] > 5]
lag_day_more_5.head()

Unnamed: 0,f_name,lightning_idx,distance,time_difference,lag_day,type


## 4.7 Filtering Original Dataframe 'df_fires_mm' with Matched Lighting Strikes

In [None]:
# Merge based on 'lightning_idx' and 'f_name'
df_matched_fires = df_fires_mm.merge(
    df_matched[['lightning_idx', 'f_name', 'time_difference', 'lag_day', 'type']],
    left_on=['idx', 'f_name'],
    right_on=['lightning_idx', 'f_name'],
    how='inner'
)

# Drop the redundant 'lightning_idx' column after merging
df_matched_fires.drop(['lightning_idx'], axis=1, inplace=True)
print(len(df_matched_fires))
df_matched_fires.head()


410


Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire,distance,time_difference,lag_day,type
0,22735983,2022-05-04 19:41:02.999083757,26.35911,-80.416652,-15985,163,73,121,G,29.274231,...,0.0,0.2,0.0,0.0,0.0,0.0,1.269497,0 days 03:26:57.000916243,0,promptly-detected
1,22087195,2022-05-02 20:48:02.668427944,29.715181,-83.139749,-20308,123,55,122,G,28.880819,...,0.499999,0.0,0.0,0.0,0.0,0.899998,0.32369,1 days 02:50:57.331572056,1,promptly-detected
2,15877204,2022-04-04 20:31:53.931598902,26.581265,-81.532187,-1874,232,100,117,G,27.151417,...,3.799992,0.0,0.0,0.0,0.0,122.400009,1.486524,1 days 07:08:06.068401098,1,promptly-detected
3,41312114,2022-06-10 19:09:13.076931000,29.726376,-81.83046,-1893,314,131,118,G,29.027579,...,0.1,0.1,0.0,5.499989,5.499989,0.0,1.651014,0 days 00:14:46.923069,0,promptly-detected
4,70075096,2022-08-02 19:19:56.876224995,29.015644,-80.992914,-37706,116,52,120,G,33.091169,...,0.0,0.0,5.599989,5.799988,5.799988,0.0,1.245368,0 days 01:18:03.123775005,0,promptly-detected


## 4.8 Checking which Fires are in 'df_excluded_fires' but not in 'df_matched_fires'

In [None]:
# Convert 'f_name' columns to sets
excluded_fires_set = set(df_excluded_fires['f_name'])
matched_fires_set = set(df_matched_fires['f_name'])

# Find the difference
fires_not_in_matched = excluded_fires_set - matched_fires_set

# Print the result
print(fires_not_in_matched)

{'PITTMAN FIRE (64)', 'LOGGING TRAIL (64)', '201 SOUTH (55)', 'Cactus Hill (28)', 'West Josephine  (28)', 'OAK (22)', 'SW 153RD TER (38)', '4 post  (47)'}


## 4.9 Checking if 'fires_not_in_matched' have distance > 2km

In [None]:
# List of fire names to check
fire_names_to_check = {'OAK (22)', 'West Josephine  (28)', 'LOGGING TRAIL (64)', 'Cactus Hill (28)',
                       '4 post  (47)', 'SW 153RD TER (38)', '201 SOUTH (55)', 'PITTMAN FIRE (64)'}


# Filter df_distance for rows with these fire names
filtered_df = df_distances[df_distances['f_name'].isin(fire_names_to_check)]

# Check if any of these rows have a distance < 2 km
fires_with_nearby_lightning = filtered_df[filtered_df['distance'] < 2]['f_name'].unique()

print(fires_with_nearby_lightning)


[]


## 4.10 Checking for Fires with More than One Lighting Strike in 'df_matched_fires'

In [None]:
duplicates = df_matched_fires[df_matched_fires.duplicated(['f_name'], keep=False)] # keep=False ensures that all duplicates are shown, not just the second occurrence
duplicates.head()

Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire,distance,time_difference,lag_day,type


## 4.11 Checking for Null Values in calculates 'distance', 'time_difference', 'lag_day', and 'type'

In [None]:
null_rows = df_matched_fires[df_matched_fires[['distance', 'time_difference', 'lag_day', 'type']].isnull().any(axis=1)]
null_rows.head()

Unnamed: 0,idx,dttime_utc,ltg_lat,ltg_lon,polarity,ell_smajor,ell_sminor,ell_angle,striketype,tmpc,...,precip24h-1d_ltg,precip24h_day_fire,precip24h_fire,precip24h+1h_fire,precip24h+2h_fire,precip24h-1d_fire,distance,time_difference,lag_day,type


In [None]:
print(len(null_rows))

0


# 5. Plotting for Fire Class

In [None]:
print(df_matched_fires['size_class'].unique())
print(df_matched_fires.columns.values)

['Class 4 (> 500 ac)' 'Class 2 (6-50 ac)' 'Class 1 (0-5 ac)'
 'Class 3 (51-500 ac)']
['idx' 'dttime_utc' 'ltg_lat' 'ltg_lon' 'polarity' 'ell_smajor'
 'ell_sminor' 'ell_angle' 'striketype' 'tmpc' 'tmpc-1h' 'tmpc-2h'
 'tmpc-3h' 'tmpc-4h' 'relh' 'relh-1h' 'relh-2h' 'relh-3h' 'relh-4h' 'sknt'
 'sknt-1h' 'sknt-2h' 'sknt-3h' 'sknt-4h' 'p01i' 'p01i-1h' 'p01i-2h'
 'p01i-3h' 'p01i-4h' 'p_obs' 'p_obs-1d' 'p_obs-2d' 'p_obs-3d' 'p_obs-4d'
 'p_obs+1d' 'p_obs+2d' 'p_obsS14d' 'p_nor' 'p_nor-1d' 'p_nor-2d'
 'p_nor-3d' 'p_nor-4d' 'p_nor+1d' 'p_nor+2d' 'p_norM14d' 'landcover' 'EVI'
 'ERC' 'BI' 'SC' 'IC' 'BIC' 'DSR' 'DC' 'DMC' 'dfmc-1h' 'dfmc-10h'
 'dfmc-100h' 'dfmc-1000h' 'RHMax' 'RHMin' 'KBDI' 'fid' 'county' 'funit'
 'region' 'f_name' 'icode' 'f_lat' 'f_lon' 'f_area' 'size_class'
 'cause_cat' 'cause_type' 'f_start' 'cont_time' 'precip24h_ltg'
 'precip24h+1h_ltg' 'precip24h+2h_ltg' 'precip24h_day_ltg'
 'precip24h-1d_ltg' 'precip24h_day_fire' 'precip24h_fire'
 'precip24h+1h_fire' 'precip24h+2h_fire' 'pre

## 5.1 Plots for Each Fire Class

In [None]:
columns_to_plot = ['precip24h-1d_ltg', 'precip24h_ltg', 'precip24h+1h_ltg', 'precip24h+2h_ltg', 'precip24h_day_ltg']
fire_classes = ['Class 1 (0-5 ac)', 'Class 2 (6-50 ac)', 'Class 3 (51-500 ac)', 'Class 4 (> 500 ac)']

for fire_class in fire_classes:
    df_filtered = df_matched_fires[df_matched_fires['size_class'] == fire_class][columns_to_plot].dropna()
    df_melt = df_filtered.melt(var_name="Columns", value_name="Precipitation (mm)")

    fig = px.box(df_melt, x="Columns", y="Precipitation (mm)", title=f"Precipitation Distributions for {fire_class}",
                 labels={"Columns": ""})

    fig.update_layout(showlegend=False)
    fig.update_traces(marker_color='blue')

    fig.show()

## 5.2 Combining Fire Classes into One Plot

In [None]:
# Columns to create boxplots from
columns_to_plot = ['precip24h-1d_ltg', 'precip24h_ltg', 'precip24h+1h_ltg', 'precip24h+2h_ltg', 'precip24h_day_ltg']

# Ordering Fire Classes
fire_classes = ['Class 1 (0-5 ac)', 'Class 2 (6-50 ac)', 'Class 3 (51-500 ac)', 'Class 4 (> 500 ac)']

# Melt the dataframe
df_melted = df_matched_fires.melt(id_vars=['size_class'], value_vars=columns_to_plot,
                         var_name='Measurement', value_name='Precipitation (mm)')
print(df_melted)

# Box plot with color based on fire class
fig = px.box(df_melted, x='Measurement', y='Precipitation (mm)', color='size_class',
             title="Precipitation Distributions Across Fire Classes",
             labels={"Measurement": ""},
             category_orders={"Measurement": columns_to_plot, "size_class": fire_classes})  # <-- Adding order for size_class

color_map = {
    'Class 1 (0-5 ac)': 'blue',
    'Class 2 (6-50 ac)': 'red',
    'Class 3 (51-500 ac)': 'green',
    'Class 4 (> 500 ac)': 'yellow'
}

fig.update_traces(marker=dict(line=dict(width=2)))
fig.update_layout(showlegend=True, colorway=list(color_map.values()))  # use custom colors

# Display the plot
fig.show()


              size_class        Measurement  Precipitation (mm)
0     Class 4 (> 500 ac)   precip24h-1d_ltg            0.000000
1      Class 2 (6-50 ac)   precip24h-1d_ltg            0.499999
2       Class 1 (0-5 ac)   precip24h-1d_ltg            3.799992
3       Class 1 (0-5 ac)   precip24h-1d_ltg            0.100000
4       Class 1 (0-5 ac)   precip24h-1d_ltg            0.000000
...                  ...                ...                 ...
2045    Class 1 (0-5 ac)  precip24h_day_ltg            0.000000
2046    Class 1 (0-5 ac)  precip24h_day_ltg            0.000000
2047    Class 1 (0-5 ac)  precip24h_day_ltg            0.000000
2048    Class 1 (0-5 ac)  precip24h_day_ltg            1.799996
2049    Class 1 (0-5 ac)  precip24h_day_ltg           27.599996

[2050 rows x 3 columns]


# 6. Precipitation for All Fire Classes

In [None]:
# Filtering out NaN values from each column and create DataFrame for plotting
df_clean = df_matched_fires[['precip24h-1d_ltg','precip24h_ltg', 'precip24h+1h_ltg', 'precip24h+2h_ltg', 'precip24h_day_ltg']].dropna()

# Melting the dataframe to a long format suitable for Plotly Express
df_melt = df_clean.melt(var_name="Columns", value_name="Precipitation (mm)")

# Create box plot
fig = px.box(df_melt, x="Columns", y="Precipitation (mm)", title="Precipitation Distributions for All Fire Classes",
             labels={"Columns": ""})

fig.update_layout(showlegend=False)  # Hide legend
fig.update_traces(marker_color='blue')  # Boxplot color

fig.show()

# 7. Statistical Values

## 7.1 Precipitation Median Values Table

In [None]:
# Median precipitation values for each Fire Class
medians = df_matched_fires.groupby('size_class')[columns_to_plot].median()

medians

Unnamed: 0_level_0,precip24h-1d_ltg,precip24h_ltg,precip24h+1h_ltg,precip24h+2h_ltg,precip24h_day_ltg
size_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Class 1 (0-5 ac),0.0,0.799998,6.099988,8.800008,1.749996
Class 2 (6-50 ac),0.0,0.799998,6.500012,8.400009,0.999998
Class 3 (51-500 ac),0.0,0.499999,1.699997,3.499993,0.399999
Class 4 (> 500 ac),0.0,0.0,0.549999,1.349997,0.649999


## 7.2 Number of Fires in the Data

In [None]:
# Count of number of fires in df_matched_fires for each Fire Class
count_classes = df_matched_fires.drop_duplicates(subset='f_name').groupby('size_class').size()
print(count_classes)
print(sum(count_classes))

size_class
Class 1 (0-5 ac)       240
Class 2 (6-50 ac)      127
Class 3 (51-500 ac)     31
Class 4 (> 500 ac)      12
dtype: int64
410


In [None]:
# Count of number of fires in df_matched_fires for each Fire Class
count_classes = df_fires_mm.drop_duplicates(subset='f_name').groupby('size_class').size()
print(count_classes)
print(sum(count_classes))

size_class
Class 1 (0-5 ac)       245
Class 2 (6-50 ac)      129
Class 3 (51-500 ac)     32
Class 4 (> 500 ac)      12
dtype: int64
418


In [None]:
# Count of number of fires in df_fires for each Fire Class
count_classes = df_fires.drop_duplicates(subset='f_name').groupby('size_class').size()
print(count_classes)
print(sum(count_classes))

size_class
Class 1 (0-5 ac)       245
Class 2 (6-50 ac)      129
Class 3 (51-500 ac)     32
Class 4 (> 500 ac)      12
dtype: int64
418


# 8. Distribution of the Number of Lighting Strikes per Fire Class

In [None]:
# filtering and Grouping for each Fire Class
dfs = {}
for fire_class in fire_classes:
    dfs[fire_class] = df_matched_fires[df_matched_fires['size_class'] == fire_class].groupby('f_name')['idx'].count()

# Create subplots for each class
fig = go.Figure()

for fire_class in fire_classes:
    fig.add_trace(go.Histogram(x=dfs[fire_class], name=fire_class))

# Update layout for clarity
fig.update_layout(barmode='group', xaxis_title='Fire Class Size',
                  yaxis_title='Number of Lightning Strikes/Fires',
                  title='Distribution of Number of Lightning Strikes per Fire',
                  bargap=0.1)

# Make the histograms slightly transparent for better visualization
fig.update_traces(opacity=0.7)

# Display the plot
fig.show()

# 9. Lighting Strike Before 22

## 9.1 Distribution Hour of the Day Lightning Striked

In [None]:
fig = px.histogram(df_matched_fires, x='hour', nbins=24, title='Number of Lightning Strikes by Hour of the Day',
                   labels={'hour': 'Hour of the Day'})

fig.update_layout(barmode='group',
                  yaxis_title='Number of Lighting Strikes',
                  bargap=0.1)

fig.update_xaxes(tickvals=list(range(0, 24, 1)))
fig.update_traces(opacity=0.7)

fig.show()

## 9.2 Hour of the Day Lightning Striked by Fire Class

In [None]:
# Create the histogram grouped by 'size_class'
fire_classes = ['Class 1 (0-5 ac)', 'Class 2 (6-50 ac)', 'Class 3 (51-500 ac)', 'Class 4 (> 500 ac)']

fig = px.histogram(df_matched_fires, x='hour', color='size_class', nbins=24,
                   title='Number of Lightning Strikes by Hour of the Day (Grouped by Fire Size)',
                   labels={'hour': 'Hour of the Day'},
                   barmode='group',
                   category_orders={"size_class": fire_classes})

fig.update_layout(yaxis_title='Number of Lighting Strikes',
                  bargap=0.1)

# Update x-axis ticks
fig.update_xaxes(tickvals=list(range(0, 24, 2)))

# Improve clarity with opacity
fig.update_traces(opacity=0.6)

fig.show()


## 9.3 Median Precipitation Level on the Day Lightning Striked Distributed by Hour Lightning Striked

In [None]:
# Group by 'hour' and compute the median for 'precip24h_day_ltg'
medians_by_hour = df_matched_fires.groupby('hour')['precip24h_day_ltg'].mean().reset_index()

# Plot the results
fig = px.bar(medians_by_hour, x='hour', y='precip24h_day_ltg',
             title='Median Precipitation on the Day of Lightning by Hour of the Day',
             labels={'precip24h_day_ltg': 'Median Precipitation (mm)', 'hour': 'Hour of the Day That Lighting Striked'})

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))

fig.update_traces(opacity=0.7)

fig.show()

This histogram shows the median precipitation level on the day of the lightning striked. The x-axis represents the hour of the day lightning striked, and the height of the bar represents the median precipitation level on that day for lightning striked at that hour of the day.

## 9.4 Mean Precipitation Level 24h Before Lightning Striked +1h After, Distributed by the Hour Lightning Striked



In [None]:
# Group by 'hour' and compute the mean for 'precip24h+1h_ltg'
means_by_hour = df_matched_fires.groupby('hour')['precip24h+1h_ltg'].mean().reset_index()

# Plot the results
fig = px.bar(means_by_hour, x='hour', y='precip24h+1h_ltg',
             title='Median Precipitation 1 Hour After Lightning by Hour of the Day',
             labels={'precip24h+1h_ltg': 'Mean Precipitation (mm)', 'hour': 'Hour of the Day That Lighting Striked'})

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))

fig.update_traces(opacity=0.7)

fig.show()

This histogram shows the mean precipitation level 24h before +1h after the lightning striked. The x-axis represents the hour of the day lightning striked, and the height of the bar represents the mean precipitation level on that day for lightning striked at that hour of the day.

### FIX PLOT

In [None]:
# Group by 'hour' and 'size_class' and compute the median for 'precip24h_day_ltg'
medians_by_hour_class = df_matched_fires.groupby(['hour', 'size_class'])['precip24h_day_ltg'].median().reset_index()

# Plot the results
fig = px.bar(medians_by_hour_class, x='hour', y='precip24h_day_ltg', color='size_class',
             title='Median Precipitation on the Day of Lightning by Hour of the Day (Grouped by Fire Size)',
             labels={'precip24h_day_ltg': 'Median Precipitation (mm)', 'hour': 'Hour of the Day'},
             barmode='group')

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))

fig.update_traces(opacity=0.7)

fig.show()

In [None]:
# Group by 'hour' and 'size_class' and compute the mean for 'precip24h_day_ltg'
mean_by_hour_class = df_matched_fires.groupby(['hour', 'size_class'])['precip24h_day_ltg'].mean().reset_index()

# Plot the results
fig = px.bar(mean_by_hour_class, x='hour', y='precip24h_day_ltg', color='size_class',
             title='Mean Precipitation on the Day of Lightning by Hour of the Day (Grouped by Fire Size)',
             labels={'precip24h_day_ltg': 'Mean Precipitation (mm)', 'hour': 'Hour of the Day'},
             barmode='group')

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))

fig.update_traces(opacity=0.7)

fig.show()

In [None]:
# Group by 'hour' and 'size_class' and compute the median for 'precip24h+1h_ltg'
medians_by_hour_class = df_matched_fires.groupby(['hour', 'size_class'])['precip24h+1h_ltg'].median().reset_index()

# Plot the results
fig = px.bar(medians_by_hour_class, x='hour', y='precip24h+1h_ltg', color='size_class',
             title='Median Precipitation on the Day of Lightning by Hour of the Day (Grouped by Fire Size)',
             labels={'precip24h+1h_ltg': 'Median Precipitation (mm)', 'hour': 'Hour of the Day'},
             barmode='group')

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))

fig.update_traces(opacity=0.7)

fig.show()

# 10. HeatMap Number of Lighting Strikes Per Hour

## 10.1 Number of Lighting Strikes Per Hour for precip24h_day

In [None]:
df_matched_fires['hour'] = pd.to_datetime(df_matched_fires['dttime_utc']).dt.hour

# Calculate the bin edges based on the range of the 'precip24h_day_ltg' and create 5 bins
min_precip = df_matched_fires['precip24h_day_ltg'].min()
max_precip = df_matched_fires['precip24h_day_ltg'].max()
bin_size = (max_precip - min_precip) / 5
bin_edges = [min_precip + i*bin_size for i in range(6)]
labels = [f"{round(bin_edges[i], 0)}-{round(bin_edges[i+1], 0)}" for i in range(5)]

# Bin 'precip24h+1h_ltg' based on these bin edges
df_matched_fires['precip_bins'] = pd.cut(df_matched_fires['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=False)

# Create the pivot table (table-like structure)
pivot_table = df_matched_fires.groupby(['hour', 'precip_bins']).size().unstack(fill_value=0)

# Reindex columns to ensure all hours are represented, filling missing hours with zeros
all_hours = list(range(24))
pivot_table = pivot_table.reindex(all_hours).fillna(0).astype(int).transpose()

# Reverse order of rows in pivot_table
pivot_table = pivot_table.iloc[::-1]
pivot_table.head()

# Convert pivot table data to logarithmic scale (with an offset to avoid log(0))
log_pivot_table = np.log(pivot_table + 1)

# Closest hundredth greater than the max value
rounded_max_value = int(np.ceil(pivot_table.values.max() / 100.0) * 100)

# Defining the tick values for the colorbar based on the rounded max value
tick_values = list(range(0, rounded_max_value + 1, 10))
log_tick_values = [np.log(val + 1) for val in tick_values]

# Create the heatmap
fig = go.Figure(go.Heatmap(
    z=log_pivot_table.values,
    x=log_pivot_table.columns,
    y=log_pivot_table.index,
    customdata=pivot_table.values,
    colorscale="Blues",
    zmin=0,
    zmax=np.log(pivot_table.values.max() + 1),
    hovertemplate='%{customdata} strikes<br>Hour: %{x}<br>Precipitation: %{y}<extra></extra>',
    colorbar=dict(
        title='Number of Strikes',
        tickvals=log_tick_values,
        ticktext=tick_values,
        tickmode='array'
    )
))

# Adjust the x-axis ticks to show every hour
fig.update_xaxes(tickvals=list(range(24)), ticktext=[str(i) for i in range(24)])

# Increase the gap between cells for better visualization
fig.update_traces(xgap=1, ygap=1)

# Reverse y-axis order
fig.update_layout(yaxis_autorange="reversed",
                  title="Number of Lightning Strikes by Hour of the Day and Precipitation Range for precip24h_day_ltg")

fig.show()


## 10.2 Number of Lighting Strikes Per Hour for precip24h+1h

In [None]:
df_matched_fires['hour'] = pd.to_datetime(df_matched_fires['dttime_utc']).dt.hour

# Calculate the bin edges based on the range of the 'precip24h+1h_ltg' and create 5 bins
min_precip = df_matched_fires['precip24h+1h_ltg'].min()
max_precip = df_matched_fires['precip24h+1h_ltg'].max()
bin_size = (max_precip - min_precip) / 5
bin_edges = [min_precip + i*bin_size for i in range(6)]
labels = [f"{round(bin_edges[i], )}-{round(bin_edges[i+1], 0)}" for i in range(5)]

# Bin 'precip24h+1h_ltg' based on these bin edges
df_matched_fires['precip_bins'] = pd.cut(df_matched_fires['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=False)

# Create the pivot table (table-like structure)
pivot_table = df_matched_fires.groupby(['hour', 'precip_bins']).size().unstack(fill_value=0)

# Reindex columns to ensure all hours are represented, filling missing hours with zeros
all_hours = list(range(24))
pivot_table = pivot_table.reindex(all_hours).fillna(0).astype(int).transpose()

# Reverse order of rows in pivot_table
pivot_table = pivot_table.iloc[::-1]
pivot_table.head()

# Convert pivot table data to logarithmic scale (with an offset to avoid log(0))
log_pivot_table = np.log(pivot_table + 1)

# Closest hundredth greater than the max value
rounded_max_value = int(np.ceil(pivot_table.values.max() / 100.0) * 100)

# Defining the tick values for the colorbar based on the rounded max value
tick_values = list(range(0, rounded_max_value + 1, 10))
log_tick_values = [np.log(val + 1) for val in tick_values]

# Create the heatmap
fig = go.Figure(go.Heatmap(
    z=log_pivot_table.values,
    x=log_pivot_table.columns,
    y=log_pivot_table.index,
    customdata=pivot_table.values,
    colorscale="Blues",
    zmin=0,
    zmax=np.log(pivot_table.values.max() + 1),
    hovertemplate='%{customdata} strikes<br>Hour: %{x}<br>Precipitation: %{y}<extra></extra>',
    colorbar=dict(
        title='Number of Strikes',
        tickvals=log_tick_values,
        ticktext=tick_values,
        tickmode='array'
    )
))

# Adjust the x-axis ticks to show every hour
fig.update_xaxes(tickvals=list(range(24)), ticktext=[str(i) for i in range(24)])

# Increase the gap between cells for better visualization
fig.update_traces(xgap=1, ygap=1)

# Reverse y-axis order
fig.update_layout(yaxis_autorange="reversed",
                  title="Number of Lightning Strikes by Hour of the Day and Precipitation Range for precip24+1h_ltg")

fig.show()


In [None]:
print(tick_values)

[0, 164.5, 329.0, 493.5, 658]


In [None]:
pivot_table.head()

hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
precip_bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
103.12-128.9,30,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,19,4,0
77.34-103.12,14,0,0,0,0,0,0,0,0,0,...,0,0,8,0,0,0,1,21,2,38
51.56-77.34,30,1,4,1,0,0,0,0,0,0,...,0,0,11,4,10,32,29,54,11,34
25.78-51.56,197,54,7,2,0,2,0,2,0,27,...,0,1,22,55,179,162,233,379,290,127
0.0-25.78,210,61,46,20,1,3,0,5,0,3,...,14,36,181,189,277,658,535,551,565,350


## 10.3 Heatmap for Fires in Class 1 and 2

In [None]:
fire_classes = ['Class 1 (0-5 ac)', 'Class 2 (6-50 ac)']

def plot_for_class(fire_class):
    # Filter the dataframe for the specific fire class
    df_filtered = df_fires_mm[df_fires_mm['size_class'] == fire_class].copy()

    df_filtered['hour'] = pd.to_datetime(df_filtered['dttime_utc']).dt.hour
    print(f"\nData for {fire_class}:")
    print(df_filtered['size_class'].head())
    # print(df_filtered['hour'].head())

    # Calculate the bin edges based on the range of the 'precip24h+1h_ltg' and create 5 bins
    min_precip = df_filtered['precip24h+1h_ltg'].min()
    max_precip = df_filtered['precip24h+1h_ltg'].max()
    bin_size = (max_precip - min_precip) / 5
    bin_edges = [min_precip + i*bin_size for i in range(6)]
    labels = [f"{round(bin_edges[i], 2)}-{round(bin_edges[i+1], 0)}" for i in range(5)]

    # Bin 'precip24h+1h_ltg' based on these bin edges
    df_filtered['precip_bins'] = pd.cut(df_filtered['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=False)

    # Create the pivot table (table-like structure)
    pivot_table = df_filtered.groupby(['hour', 'precip_bins']).size().unstack(fill_value=0)

    # Reindex columns to ensure all hours are represented, filling missing hours with zeros
    all_hours = list(range(24))
    pivot_table = pivot_table.reindex(all_hours).fillna(0).astype(int).transpose()

    # Reverse order of rows in pivot_table
    pivot_table = pivot_table.iloc[::-1]
    pivot_table.head()

    # Convert pivot table data to logarithmic scale (with an offset to avoid log(0))
    log_pivot_table = np.log(pivot_table + 1)

    # Closest hundredth greater than the max value
    rounded_max_value = int(np.ceil(pivot_table.values.max() / 100.0) * 100)

    # Defining the tick values for the colorbar based on the rounded max value
    tick_values = list(range(0, rounded_max_value + 1, 70))
    log_tick_values = [np.log(val + 1) for val in tick_values]

# Create the heatmap
    fig = go.Figure(go.Heatmap(
        z=log_pivot_table.values,
        x=log_pivot_table.columns,
        y=log_pivot_table.index,
        customdata=pivot_table.values,
        colorscale="Blues",
        zmin=0,
        zmax=np.log(pivot_table.values.max() + 1),
        hovertemplate='%{customdata} strikes<br>Hour: %{x}<br>Precipitation: %{y}<extra></extra>',
        colorbar=dict(
            title='Number of Strikes',
            tickvals=log_tick_values,
            ticktext=tick_values,
            tickmode='array'),
            text=pivot_table.values, # this line adds the text on each cell
            hoverinfo="text"
    ))

    # Adjust the x-axis ticks to show every hour
    fig.update_xaxes(tickvals=list(range(24)), ticktext=[str(i) for i in range(24)])

    # Increase the gap between cells for better visualization
    fig.update_traces(xgap=1, ygap=1)

    # Reverse y-axis order
    fig.update_layout(yaxis_autorange="reversed",
                          title=f"Number of Lightning Strikes by Hour for {fire_class} and Precipitation Range for precip24+1h_ltg")

    fig.show()

# Now, call the function for each fire class
for fire_class in fire_classes:
    plot_for_class(fire_class)



Data for Class 1 (0-5 ac):
0    Class 1 (0-5 ac)
1    Class 1 (0-5 ac)
4    Class 1 (0-5 ac)
5    Class 1 (0-5 ac)
6    Class 1 (0-5 ac)
Name: size_class, dtype: object



Data for Class 2 (6-50 ac):
3     Class 2 (6-50 ac)
8     Class 2 (6-50 ac)
11    Class 2 (6-50 ac)
27    Class 2 (6-50 ac)
28    Class 2 (6-50 ac)
Name: size_class, dtype: object


In [None]:
df_matched_fires[columns_to_plot].max()

precip24h-1d_ltg      46.100009
precip24h_ltg         60.900005
precip24h+1h_ltg      95.300013
precip24h+2h_ltg     139.600000
precip24h_day_ltg     73.600005
dtype: float64

## 10.4 Heatmap for Fires in Class 3 and 4

In [None]:
fire_classes = ['Class 3 (51-500 ac)', 'Class 4 (> 500 ac)']

def plot_for_class(fire_class):
    # Filter the dataframe for the specific fire class
    df_filtered = df_fires_mm[df_fires_mm['size_class'] == fire_class].copy()

    df_filtered['hour'] = pd.to_datetime(df_filtered['dttime_utc']).dt.hour
    print(f"\nData for {fire_class}:")
    print(df_filtered['size_class'].head())
    # print(df_filtered['hour'].head())

    # Calculate the bin edges based on the range of the 'precip24h+1h_ltg' and create 5 bins
    min_precip = df_filtered['precip24h+1h_ltg'].min()
    max_precip = df_filtered['precip24h+1h_ltg'].max()
    bin_size = (max_precip - min_precip) / 5
    bin_edges = [min_precip + i*bin_size for i in range(6)]
    labels = [f"{round(bin_edges[i], 2)}-{round(bin_edges[i+1], 0)}" for i in range(5)]

    # Bin 'precip24h+1h_ltg' based on these bin edges
    df_filtered['precip_bins'] = pd.cut(df_filtered['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=False)

    # Create the pivot table (table-like structure)
    pivot_table = df_filtered.groupby(['hour', 'precip_bins']).size().unstack(fill_value=0)

    # Reindex columns to ensure all hours are represented, filling missing hours with zeros
    all_hours = list(range(24))
    pivot_table = pivot_table.reindex(all_hours).fillna(0).astype(int).transpose()

    # Reverse order of rows in pivot_table
    pivot_table = pivot_table.iloc[::-1]
    pivot_table.head()

    # Convert pivot table data to logarithmic scale (with an offset to avoid log(0))
    log_pivot_table = np.log(pivot_table + 1)

    # Closest hundredth greater than the max value
    rounded_max_value = int(np.ceil(pivot_table.values.max() / 100.0) * 100)

    # Defining the tick values for the colorbar based on the rounded max value
    tick_values = list(range(0, rounded_max_value + 1, 5))
    log_tick_values = [np.log(val + 1) for val in tick_values]

    # Create the heatmap
    fig = go.Figure(go.Heatmap(
        z=log_pivot_table.values,
        x=log_pivot_table.columns,
        y=log_pivot_table.index,
        customdata=pivot_table.values,
        colorscale="Blues",
        zmin=0,
        zmax=np.log(pivot_table.values.max() + 1),
        hovertemplate='%{customdata} strikes<br>Hour: %{x}<br>Precipitation: %{y}<extra></extra>',
        colorbar=dict(
            title='Number of Strikes',
            tickvals=log_tick_values,
            ticktext=tick_values,
            tickmode='array'),
            text=pivot_table.values, # this line adds the text on each cell
            hoverinfo="text"
    ))

    # Adjust the x-axis ticks to show every hour
    fig.update_xaxes(tickvals=list(range(24)), ticktext=[str(i) for i in range(24)])

    # Increase the gap between cells for better visualization
    fig.update_traces(xgap=1, ygap=1)

    # Reverse y-axis order
    fig.update_layout(yaxis_autorange="reversed",
                          title=f"Number of Lightning Strikes by Hour for {fire_class} and Precipitation Range for precip24+1h_ltg")

    fig.show()

# Now, call the function for each fire class
for fire_class in fire_classes:
    plot_for_class(fire_class)



Data for Class 3 (51-500 ac):
16    Class 3 (51-500 ac)
17    Class 3 (51-500 ac)
18    Class 3 (51-500 ac)
19    Class 3 (51-500 ac)
20    Class 3 (51-500 ac)
Name: size_class, dtype: object



Data for Class 4 (> 500 ac):
2       Class 4 (> 500 ac)
1013    Class 4 (> 500 ac)
1014    Class 4 (> 500 ac)
1015    Class 4 (> 500 ac)
1016    Class 4 (> 500 ac)
Name: size_class, dtype: object


# 11. Stack Bar with Number of Lightning Striked and Precipitation Levels Per Hour of the Day

## 11.1 Calculating Precipitation Bins

In [None]:
# Calculate the bin edges based on the range of the 'precip24h+1h_ltg' and create 5 bins
min_precip = df_matched_fires['precip24h+1h_ltg'].min()
max_precip = df_matched_fires['precip24h+1h_ltg'].max()
bin_size = (max_precip - min_precip) / 10
bin_edges = [min_precip + i*bin_size for i in range(11)]
labels = [f"{round(bin_edges[i],0)}-{round(bin_edges[i+1], 0)}" for i in range(10)]

# Bin 'precip24h+1h_ltg' based on these bin edges
df_matched_fires['precip_bins'] = pd.cut(df_matched_fires['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=False)


In [None]:
# Adjust the bin edges slightly to ensure all values fall into the bins
bin_edges[0] = bin_edges[0] - 0.001  # Adjusting the first edge slightly to the left
bin_edges[-1] = bin_edges[-1] + 0.001  # Adjusting the last edge slightly to the right

# Bin 'precip24h+1h_ltg' based on these bin edges
df_matched_fires['precip_bins'] = pd.cut(df_matched_fires['precip24h+1h_ltg'], bins=bin_edges, labels=labels, right=True)


## 11.2 Checking for Nan Values

In [None]:
print(df_matched_fires['hour'].isnull().sum())
print(df_matched_fires['precip_bins'].isnull().sum())

0
0


In [None]:
df_matched_fires['hour'] = pd.to_datetime(df_matched_fires['dttime_utc']).dt.hour

# 1. Calculate counts
pivot_table = df_matched_fires.groupby(['hour', 'precip_bins']).size().unstack(fill_value=0)

# 2. Sort the bins
pivot_table = pivot_table[sorted(pivot_table.columns, key=lambda x: float(x.split('-')[0]))]

# 3. Generate colors
custom_color_scale = [
    '#D2E4F4', # Lighter blue
    '#A6C9E2',
    '#79AED0',
    '#4D93BE',
    '#24679B', # Darker blue
    '#FAD2D3', # Lighter red
    '#F79A9D',
    '#F26268',
    '#EB2B35',
    '#D00000'  # Darker red
]
n_bins = len(pivot_table.columns)
colorscale = px.colors.sequential.Blues
colors = custom_color_scale

# 4. Plot
fig = go.Figure()

for idx, column in enumerate(pivot_table.columns):
    fig.add_trace(
        go.Bar(x=pivot_table.index,
               y=pivot_table[column],
               name=column,
               marker_color=colors[idx])
    )

fig.update_layout(
    title='Number of Lightning Strikes by Hour of the Day and Precipitation Range',
    xaxis_title='Hour of the Day',
    yaxis_title='Number of Lightning Strikes',
    barmode='stack',
    height=600  # adjust this value as per your preference
)

fig.update_layout(bargap=0.1)
fig.update_xaxes(tickvals=list(range(0, 24)))
fig.update_traces(hovertemplate="Number of Lightning Strikes: %{y}<br>Hour: %{x}<extra></extra>")


fig.show()

**Height of the Stack Bar:** Number of Lightning Strikes \\
**Color of the Stack Bar:** Precipitaiton Level  
**Total Height of the Bar:** Total number of lightning strikes

# 12. Downloading Matched Lightning and Fire DataFrame

In [None]:
df_matched_fires.to_csv('2022_with_fire_mrms_v7_MATCHED_LIGHTNING.csv')
files.download('2022_with_fire_mrms_v7_MATCHED_LIGHTNING.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>