# Imports and installations

In [None]:
!pip install cartopy

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cartopy.feature as cf
import cartopy.crs as ccrs
import cartopy.mpl.ticker as cticker

from cartopy.util import add_cyclic_point
from matplotlib import animation
from tqdm.auto import tqdm
from datetime import datetime,timedelta

In [None]:
import  xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime,timedelta
import warnings
import math
from tqdm.auto import tqdm
from tqdm.auto import tqdm


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Processing data for the plot

## Daily maximums


Here we are using ERA5 data to calculate our **daily maximum t2m**.

In [None]:

# This is a very time consuming process.
import os
from tqdm.auto import tqdm
directory_path='/content/drive/MyDrive/AP_HW/ERA5 Data/t2m_d2m_sp'

t2m_list=[]
for filename in tqdm(sorted(os.listdir(directory_path))):
  if filename.endswith('.nc'):
    filepath=os.path.join(directory_path,filename)
    ds=xr.open_dataset(filepath)
    year=int(filename[11:-3])
    lat_range=slice(26.5,20.75)
    lon_range=slice(88,92.5)
    ds_t2m=ds.t2m.sel(latitude=lat_range,longitude=lon_range)
    ds_t2m_bdt=ds_t2m.shift(time=6)
    ds_t2m_C=ds_t2m_bdt-273.15
    ds_t2m_C=ds_t2m_C.assign_attrs(ds_t2m.attrs)
    ds_t2m_C.attrs['units']= '° C'
    tmax_ds_daily=ds_t2m_C.resample(time='D').max()
    t2m_list.append(tmax_ds_daily)
max_t2m_no_mask=xr.concat(t2m_list,dim='time')
max_t2m_no_mask.to_netcdf('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_t2m_no_mask.nc')

Here we are using Ept data calculated using the `EPT calculation.ipynb`

In [None]:
ds=xr.open_mfdataset('/content/drive/MyDrive/AP_HW/ERA5 Data/EPT/*.nc')
lat_range=slice(26.5,20.75)
lon_range=slice(88,92.5)
ds_ept=ds.ept.sel(latitude=lat_range,longitude=lon_range)
ds_ept_bdt=ds_ept.shift(time=6)
ds_ept_bdt
ds_ept_bdt.to_netcdf('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_ept_no_mask.nc')

Here we are using WBT data calculated using the `WBT calculation.ipynb`

In [None]:
wbts=xr.open_mfdataset('/content/drive/MyDrive/AP_HW/Scripts-ll/WBT/wbt_files/*.nc')
lat_range=slice(26.5,20.75)
lon_range=slice(88,92.5)
wbt_ds=wbts.wbt.sel(latitude=lat_range,longitude=lon_range)
wbt_ds.to_netcdf('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_wbt_no_mask.nc')

In [None]:
# Opening the saved files for t2m, ept, wbt.

ds_ept =xr.open_dataset('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_ept_no_mask.nc')
ds_t2m =xr.open_dataset('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_t2m_no_mask.nc')
ds_wbt =xr.open_dataset('/content/drive/MyDrive/AP_HW/Scripts-ll/HW_dates/data/max_wbt_no_mask.nc')
ds_wbt

In [None]:
# converting from K to C

ds_ept_degC=ds_ept-273.15
ds_t2m_degC=ds_t2m.copy()
ds_wbt_degC=ds_wbt-273.15
ds_wbt_degC

In [None]:
# Making daily data

ds_ept_daily_max=ds_ept_degC.ept.resample(time='D').max()
ds_t2m_daily_max=ds_t2m_degC.t2m.resample(time='D').max()
ds_wbt_daily_max=ds_wbt_degC.wbt.resample(time='D').max()
ds_wbt_daily_max

In [None]:
# Taking Spatial Average for BD

weights=np.cos(np.deg2rad(ds_ept_daily_max.latitude))
weights.name='weights'
daily_ept_weighted = ds_ept_daily_max.weighted(weights)
BD_mean_max_ept = daily_ept_weighted.mean(['longitude','latitude'])
BD_mean_max_ept

weights=np.cos(np.deg2rad(ds_t2m_daily_max.latitude))
weights.name='weights'
daily_t2m_weighted = ds_t2m_daily_max.weighted(weights)
BD_mean_max_t2m = daily_t2m_weighted.mean(['longitude','latitude'])
BD_mean_max_t2m

weights=np.cos(np.deg2rad(ds_wbt_daily_max.latitude))
weights.name='weights'
daily_wbt_weighted = ds_wbt_daily_max.weighted(weights)
BD_mean_max_wbt = daily_wbt_weighted.mean(['longitude','latitude'])
BD_mean_max_wbt

In [None]:
# Taking data for premonsoon and monsoon

BD_mean_max_ept=BD_mean_max_ept.sel(time=BD_mean_max_ept.time.dt.month.isin([3,4,5,6,7,8,9]))
BD_mean_max_t2m=BD_mean_max_t2m.sel(time=BD_mean_max_t2m.time.dt.month.isin([3,4,5,6,7,8,9]))
BD_mean_max_wbt=BD_mean_max_wbt.sel(time=BD_mean_max_wbt.time.dt.month.isin([3,4,5,6,7,8,9]))

In [None]:
# ds_t2m_daily_max

In [None]:
# Converting to Dataframe
max_ept_df=BD_mean_max_ept.to_dataframe().reset_index()
max_t2m_df=BD_mean_max_t2m.to_dataframe().reset_index()
max_wbt_df=BD_mean_max_wbt.to_dataframe().reset_index()


In [None]:
max_wbt_df

In [None]:
# Merging the dataframes for t2m,ept,wbt.
df = pd.merge(max_ept_df,max_t2m_df)
df=pd.merge(df,max_wbt_df)
df

In [None]:
# Saving the file to use it later for plotting.
df.to_csv('/content/drive/MyDrive/AP_HW/Scripts-ll/Trends/data/ept_t2m_wbt_df.csv',index=False)

# Plot

In [None]:
# Loading the file for plotting
df=pd.read_csv('/content/drive/MyDrive/AP_HW/Scripts-ll/Trends/data/ept_t2m_wbt_df.csv')
df

In [None]:
pd.set_option('display.float_format', '{:.2f}'.format)

In [None]:
#cleaning the dataframe and separating it by season
clean_df=df.dropna()
clean_df['time']=pd.to_datetime(clean_df['time'])
df_mam = clean_df[(clean_df['time'].dt.month >= 3) & (clean_df['time'].dt.month <= 5)]
df_jjas = clean_df[(clean_df['time'].dt.month >= 6) & (clean_df['time'].dt.month <= 9)]

In [None]:
df_mam['ept'].describe(percentiles=[0.95])


In [None]:
df_mam['t2m'].describe(percentiles=[0.95])


In [None]:
df_mam['wbt'].describe(percentiles=[0.95])


In [None]:
# Functions used for plotting.

from scipy.stats import gaussian_kde

# Generate a common x-axis range based on two temperature arrays
def x_vals_range(temp_values_1,temp_values_2):
  value_floor_1 = np.floor(temp_values_1.min())
  value_ceil_1 = np.ceil(temp_values_1.max())
  value_floor_2 = np.floor(temp_values_2.min())
  value_ceil_2 = np.ceil(temp_values_2.max())
  value_floor=min(value_floor_1,value_floor_2)
  value_ceil= max(value_ceil_1,value_ceil_2)
  x_vals=np.arange(value_floor, value_ceil + 0.01, 0.1)
  return x_vals

# Compute Gaussian KDEs for two datasets and return evaluated values
def calculate_kde(temp_values_1,temp_values_2):
  kde_1 = gaussian_kde(temp_values_1)  # KDE for the first dataset
  kde_2 = gaussian_kde(temp_values_2)  # KDE for the second dataset
  x_vals= x_vals_range(temp_values_1,temp_values_2)
  # Evaluate KDE
  y_kde_1 = kde_1(x_vals)
  y_kde_2 = kde_2(x_vals)
  return y_kde_1,y_kde_2,x_vals

# Calculate 25th, 50th, and 75th percentiles of a dataset
def calculate_quartiles(data):
    return np.percentile(data, [25, 50, 75])

# Compute the percentage increase from value1 to value2
def percentage_increase(value1,value2):
  difference=value2-value1
  percentage=(difference/value1)*100
  return percentage

# Calculate percentage increase in area under KDE curves for a specific region
def increase_in_filled_region(y_kde_1,y_kde_2,x_vals,condition):
  area_1=np.trapz(y_kde_1[condition], x_vals[condition])
  area_2=np.trapz(y_kde_2[condition], x_vals[condition])
  percentage=((area_2-area_1)/area_1)*100
  return percentage


In [None]:
#@title Table_1
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# First row: MAM data
df_1=df_mam[(df_mam['time'].dt.year >= 1971) & (df_mam['time'].dt.year <= 1985)]
df_2=df_mam[(df_mam['time'].dt.year >= 2010) & (df_mam['time'].dt.year <= 2024)]

# Evaluate KDE
t2m_mam_y_kde_1,t2m_mam_y_kde_2,t2m_x_vals = calculate_kde(df_1.t2m,df_2.t2m)
ept_mam_y_kde_1,ept_mam_y_kde_2,ept_x_vals = calculate_kde(df_1.ept,df_2.ept)
wbt_mam_y_kde_1,wbt_mam_y_kde_2,wbt_x_vals = calculate_kde(df_1.wbt,df_2.wbt)

# quartiles
t2m_mam_quartiles_1 = calculate_quartiles(df_1.t2m)
t2m_mam_quartiles_2 = calculate_quartiles(df_2.t2m)
ept_mam_quartiles_1 = calculate_quartiles(df_1.ept)
ept_mam_quartiles_2 = calculate_quartiles(df_2.ept)
wbt_mam_quartiles_1 = calculate_quartiles(df_1.wbt)
wbt_mam_quartiles_2 = calculate_quartiles(df_2.wbt)
increase_mam_t2m=percentage_increase(t2m_mam_quartiles_1,t2m_mam_quartiles_2)
increase_mam_ept=percentage_increase(ept_mam_quartiles_1,ept_mam_quartiles_2)
increase_mam_wbt=percentage_increase(wbt_mam_quartiles_1,wbt_mam_quartiles_2)



print(f'T2M Percentile premonsoon (1971-1985):',f'25th:{t2m_mam_quartiles_1[0]:.2f}, 50th:{t2m_mam_quartiles_1[1]:.2f}, 75th:{t2m_mam_quartiles_1[2]:.2f}')
print(f'T2M Percentile premonsoon (2009-2024):',f'25th:{t2m_mam_quartiles_2[0]:.2f}, 50th:{t2m_mam_quartiles_2[1]:.2f}, 75th:{t2m_mam_quartiles_2[2]:.2f}')
print(f'T2M Percentile premonsoon Increase   :',f'25th:+{increase_mam_t2m[0]:.2f}%, 50th:+{increase_mam_t2m[1]:.2f}%, 75th:+{increase_mam_t2m[2]:.2f}%')

print(f'EPT Percentile premonsoon (1971-1985):',f'25th:{ept_mam_quartiles_1[0]:.2f}, 50th:{ept_mam_quartiles_1[1]:.2f}, 75th:{ept_mam_quartiles_1[2]:.2f}')
print(f'EPT Percentile premonsoon (2009-2024):',f'25th:{ept_mam_quartiles_2[0]:.2f}, 50th:{ept_mam_quartiles_2[1]:.2f}, 75th:{ept_mam_quartiles_2[2]:.2f}')
print(f'EPT Percentile premonsoon Increase   :',f'25th:+{increase_mam_ept[0]:.2f}%, 50th:+{increase_mam_ept[1]:.2f}%, 75th:+{increase_mam_ept[2]:.2f}%')

print(f'WBT Percentile premonsoon (1971-1985):',f'25th:{wbt_mam_quartiles_1[0]:.2f}, 50th:{wbt_mam_quartiles_1[1]:.2f}, 75th:{wbt_mam_quartiles_1[2]:.2f}')
print(f'WBT Percentile premonsoon (2009-2024):',f'25th:{wbt_mam_quartiles_2[0]:.2f}, 50th:{wbt_mam_quartiles_2[1]:.2f}, 75th:{wbt_mam_quartiles_2[2]:.2f}')
print(f'WBT Percentile premonsoon Increase   :',f'25th:+{increase_mam_wbt[0]:.2f}%, 50th:+{increase_mam_wbt[1]:.2f}%, 75th:+{increase_mam_wbt[2]:.2f}%')

# Second row: JJAS data
df_1=df_jjas[(df_jjas['time'].dt.year >= 1971) & (df_jjas['time'].dt.year <= 1985)]
df_2=df_jjas[(df_jjas['time'].dt.year >= 2010) & (df_jjas['time'].dt.year <= 2024)]

# Evaluate KDE
t2m_jjas_y_kde_1,t2m_jjas_y_kde_2,t2m_x_vals = calculate_kde(df_1.t2m,df_2.t2m)
ept_jjas_y_kde_1,ept_jjas_y_kde_2,ept_x_vals = calculate_kde(df_1.ept,df_2.ept)
wbt_jjas_y_kde_1,wbt_jjas_y_kde_2,wbt_x_vals = calculate_kde(df_1.wbt,df_2.wbt)

# quartiles
t2m_jjas_quartiles_1 = calculate_quartiles(df_1.t2m)
t2m_jjas_quartiles_2 = calculate_quartiles(df_2.t2m)
ept_jjas_quartiles_1 = calculate_quartiles(df_1.ept)
ept_jjas_quartiles_2 = calculate_quartiles(df_2.ept)
wbt_jjas_quartiles_1 = calculate_quartiles(df_1.wbt)
wbt_jjas_quartiles_2 = calculate_quartiles(df_2.wbt)
increase_jjas_t2m=percentage_increase(t2m_jjas_quartiles_1,t2m_jjas_quartiles_2)
increase_jjas_ept=percentage_increase(ept_jjas_quartiles_1,ept_jjas_quartiles_2)
increase_jjas_wbt=percentage_increase(wbt_jjas_quartiles_1,wbt_jjas_quartiles_2)


print(f'T2M Percentile monsoon (1971-1985):',f'25th:{t2m_jjas_quartiles_1[0]:.2f}, 50th:{t2m_jjas_quartiles_1[1]:.2f}, 75th:{t2m_jjas_quartiles_1[2]:.2f}')
print(f'T2M Percentile monsoon (2009-2024):',f'25th:{t2m_jjas_quartiles_2[0]:.2f}, 50th:{t2m_jjas_quartiles_2[1]:.2f}, 75th:{t2m_jjas_quartiles_2[2]:.2f}')
print(f'T2M Percentile monsoon Increase   :',f'25th:+{increase_jjas_t2m[0]:.2f}%, 50th:+{increase_jjas_t2m[1]:.2f}%, 75th:+{increase_jjas_t2m[2]:.2f}%')

print(f'EPT Percentile monsoon (1971-1985):',f'25th:{ept_jjas_quartiles_1[0]:.2f}, 50th:{ept_jjas_quartiles_1[1]:.2f}, 75th:{ept_jjas_quartiles_1[2]:.2f}')
print(f'EPT Percentile monsoon (2009-2024):',f'25th:{ept_jjas_quartiles_2[0]:.2f}, 50th:{ept_jjas_quartiles_2[1]:.2f}, 75th:{ept_jjas_quartiles_2[2]:.2f}')
print(f'EPT Percentile monsoon Increase   :',f'25th:+{increase_jjas_ept[0]:.2f}%, 50th:+{increase_jjas_ept[1]:.2f}%, 75th:+{increase_jjas_ept[2]:.2f}%')

print(f'WBT Percentile monsoon (1971-1985):',f'25th:{wbt_jjas_quartiles_1[0]:.2f}, 50th:{wbt_jjas_quartiles_1[1]:.2f}, 75th:{wbt_jjas_quartiles_1[2]:.2f}')
print(f'WBT Percentile monsoon (2009-2024):',f'25th:{wbt_jjas_quartiles_2[0]:.2f}, 50th:{wbt_jjas_quartiles_2[1]:.2f}, 75th:{wbt_jjas_quartiles_2[2]:.2f}')
print(f'WBT Percentile monsoon Increase   :',f'25th:+{increase_jjas_wbt[0]:.2f}%, 50th:+{increase_jjas_wbt[1]:.2f}%, 75th:+{increase_jjas_wbt[2]:.2f}%')



In [None]:
import pandas as pd

# Build data manually using your calculated quartiles & percentage increases

table_data = {
    "Season": ["Pre-monsoon"]*3 + ["Monsoon"]*3,
    "Quartiles": ["Q1", "Q2", "Q3", "Q1", "Q2", "Q3"],

    "T2M (1971–1985)": [t2m_mam_quartiles_1[0], t2m_mam_quartiles_1[1], t2m_mam_quartiles_1[2],
                        t2m_jjas_quartiles_1[0], t2m_jjas_quartiles_1[1], t2m_jjas_quartiles_1[2]],
    "T2M (2009–2024)": [t2m_mam_quartiles_2[0], t2m_mam_quartiles_2[1], t2m_mam_quartiles_2[2],
                        t2m_jjas_quartiles_2[0], t2m_jjas_quartiles_2[1], t2m_jjas_quartiles_2[2]],
    "T2M Increase (%)": [increase_mam_t2m[0], increase_mam_t2m[1], increase_mam_t2m[2],
                         increase_jjas_t2m[0], increase_jjas_t2m[1], increase_jjas_t2m[2]],

    "EPT (1971–1985)": [ept_mam_quartiles_1[0], ept_mam_quartiles_1[1], ept_mam_quartiles_1[2],
                        ept_jjas_quartiles_1[0], ept_jjas_quartiles_1[1], ept_jjas_quartiles_1[2]],
    "EPT (2009–2024)": [ept_mam_quartiles_2[0], ept_mam_quartiles_2[1], ept_mam_quartiles_2[2],
                        ept_jjas_quartiles_2[0], ept_jjas_quartiles_2[1], ept_jjas_quartiles_2[2]],
    "EPT Increase (%)": [increase_mam_ept[0], increase_mam_ept[1], increase_mam_ept[2],
                         increase_jjas_ept[0], increase_jjas_ept[1], increase_jjas_ept[2]],

    "WBT (1971–1985)": [wbt_mam_quartiles_1[0], wbt_mam_quartiles_1[1], wbt_mam_quartiles_1[2],
                        wbt_jjas_quartiles_1[0], wbt_jjas_quartiles_1[1], wbt_jjas_quartiles_1[2]],
    "WBT (2009–2024)": [wbt_mam_quartiles_2[0], wbt_mam_quartiles_2[1], wbt_mam_quartiles_2[2],
                        wbt_jjas_quartiles_2[0], wbt_jjas_quartiles_2[1], wbt_jjas_quartiles_2[2]],
    "WBT Increase (%)": [increase_mam_wbt[0], increase_mam_wbt[1], increase_mam_wbt[2],
                         increase_jjas_wbt[0], increase_jjas_wbt[1], increase_jjas_wbt[2]]
}

# Create DataFrame
df_table = pd.DataFrame(table_data)

# Round values
df_table = df_table.round(2)

# Display in notebook
import IPython.display as disp
disp.display(df_table)

# Export if needed
df_table.to_csv("/content/drive/MyDrive/AP_HW/Scripts-ll/Manuscript codes/Fig_3/Fig_3_data/Table_1.csv", index=False)



In [None]:
df

we used the no mask

In [None]:
#@title figure

# Now we are  plotting the PDFs  for the data above.

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

fig, axs = plt.subplots(3, 2, figsize=(15, 12))
axs=axs.flatten()

# First row: MAM data
df_1=df_mam[(df_mam['time'].dt.year >= 1971) & (df_mam['time'].dt.year <= 1985)]
df_2=df_mam[(df_mam['time'].dt.year >= 2010) & (df_mam['time'].dt.year <= 2024)]

# Evaluate KDE
t2m_mam_y_kde_1,t2m_mam_y_kde_2,t2m_x_vals = calculate_kde(df_1.t2m,df_2.t2m)
ept_mam_y_kde_1,ept_mam_y_kde_2,ept_x_vals = calculate_kde(df_1.ept,df_2.ept)
wbt_mam_y_kde_1,wbt_mam_y_kde_2,wbt_x_vals = calculate_kde(df_1.wbt,df_2.wbt)


# Plot using Matplotlib
axs[0].plot(t2m_x_vals, t2m_mam_y_kde_1,label=f't2m (1971-1985)',color='blue', linestyle='--')
axs[0].plot(t2m_x_vals, t2m_mam_y_kde_2, label='t2m (2010-2024)', color='red')
axs[0].fill_between(t2m_x_vals, t2m_mam_y_kde_2, where=(t2m_x_vals > np.nanpercentile(df_mam.t2m, 95)), color='red', alpha=0.5)
axs[0].fill_between(t2m_x_vals, t2m_mam_y_kde_1, where=(t2m_x_vals > np.nanpercentile(df_mam.t2m, 95)), color='blue', alpha=0.8)


axs[2].plot(ept_x_vals, ept_mam_y_kde_1, label='ept (1971-1985)', color='blue', linestyle='--')
axs[2].plot(ept_x_vals, ept_mam_y_kde_2, label='ept (2010-2024)', color='red')
axs[2].fill_between(ept_x_vals, ept_mam_y_kde_2, where=(ept_x_vals > np.nanpercentile(df_mam.ept, 95)), color='red', alpha=0.5)
axs[2].fill_between(ept_x_vals, ept_mam_y_kde_1, where=(ept_x_vals > np.nanpercentile(df_mam.ept, 95)), color='blue', alpha=0.8)

axs[4].plot(wbt_x_vals, wbt_mam_y_kde_1, label='wbt (1971-1985)', color='blue', linestyle='--')
axs[4].plot(wbt_x_vals, wbt_mam_y_kde_2, label='wbt (2010-2024)', color='red')
axs[4].fill_between(wbt_x_vals, wbt_mam_y_kde_2, where=(wbt_x_vals > np.nanpercentile(df_mam.wbt, 95)), color='red', alpha=0.5)
axs[4].fill_between(wbt_x_vals, wbt_mam_y_kde_1, where=(wbt_x_vals > np.nanpercentile(df_mam.wbt, 95)), color='blue', alpha=0.8)

increased_area=increase_in_filled_region(t2m_mam_y_kde_1,t2m_mam_y_kde_2,t2m_x_vals,t2m_x_vals > np.nanpercentile(df_mam.t2m, 95))
axs[0].annotate(f'+{increased_area:.2f}%', xy=(38, 0.014),fontsize=9, color='red',)

increased_area=increase_in_filled_region(ept_mam_y_kde_1,ept_mam_y_kde_2,ept_x_vals,ept_x_vals > np.nanpercentile(df_mam.ept, 95))
axs[2].annotate(f'+{increased_area:.2f}%', xy=(110, 0.0078),fontsize=9, color='red',)

increased_area=increase_in_filled_region(wbt_mam_y_kde_1,wbt_mam_y_kde_2,wbt_x_vals,wbt_x_vals > np.nanpercentile(df_mam.wbt, 95))
axs[4].annotate(f'+{increased_area:.2f}%', xy=(30.2, 0.062),fontsize=9, color='red',)

axs[0].arrow(x=36, y=0.016, dx=1.5, dy=0, width=.002,color='red',head_width=0.008,head_length=0.4,overhang=0.1)
axs[2].arrow(x=104.3, y=0.0085, dx=4.3, dy=0, width=.0003,color='red',head_width=0.0018,head_length=1.1,overhang=0.1)
axs[4].arrow(x=28.25, y=0.065, dx=1.5, dy=0, width=.002,color='red',head_width=0.008,head_length=0.4,overhang=0.1)


# # Set x-ticks
axs[0].set_xticks(np.arange(20, 45+1, 5))
axs[2].set_xticks(np.arange(35, 125+1, 15))
axs[4].set_xticks(np.arange(15, 35+1, 5))
axs[2].set_xlim(32, 135)
axs[4].set_xlim(11, 35)


# # Set y-ticks
axs[0].set_yticks(np.arange(0, 0.25 + 0.01, 0.05))
axs[2].set_yticks(np.arange(0, 0.045 + 0.001, 0.01))
axs[4].set_yticks(np.arange(0, 0.25 + 0.01, 0.05))
axs[0].set_ylim(0, 0.25)
axs[2].set_ylim(0, 0.045)
axs[4].set_ylim(0, 0.25)



# Labels and titles
axs[0].set_xlabel('2m Temperature(°C)')
axs[0].set_ylabel('probability of occurrence')
axs[0].set_title('Pre-monsoon',fontsize=10)

axs[2].set_xlabel('Equivalent Potential Temperature(°C)')
axs[2].set_ylabel('probability of occurrence')
axs[2].set_title('Pre-monsoon',fontsize=10)

axs[4].set_xlabel('Wet Bulb Temperature(°C)')
axs[4].set_ylabel('probability of occurrence')
axs[4].set_title('Pre-monsoon',fontsize=10)

# Add legends
axs[0].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)
axs[2].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)
axs[4].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)


# Second row: JJAS data
df_1=df_jjas[(df_jjas['time'].dt.year >= 1971) & (df_jjas['time'].dt.year <= 1985)]
df_2=df_jjas[(df_jjas['time'].dt.year >= 2010) & (df_jjas['time'].dt.year <= 2024)]

# Evaluate KDE
t2m_jjas_y_kde_1,t2m_jjas_y_kde_2,t2m_x_vals = calculate_kde(df_1.t2m,df_2.t2m)
ept_jjas_y_kde_1,ept_jjas_y_kde_2,ept_x_vals = calculate_kde(df_1.ept,df_2.ept)
wbt_jjas_y_kde_1,wbt_jjas_y_kde_2,wbt_x_vals = calculate_kde(df_1.wbt,df_2.wbt)


# quartiles
t2m_jjas_quartiles_1 = calculate_quartiles(df_1.t2m)
t2m_jjas_quartiles_2 = calculate_quartiles(df_2.t2m)
ept_jjas_quartiles_1 = calculate_quartiles(df_1.ept)
ept_jjas_quartiles_2 = calculate_quartiles(df_2.ept)
wbt_jjas_quartiles_1 = calculate_quartiles(df_1.wbt)
wbt_jjas_quartiles_2 = calculate_quartiles(df_2.wbt)

# Plot using Matplotlib
axs[1].plot(t2m_x_vals, t2m_jjas_y_kde_1, label=f't2m (1971-1985) ', color='blue', linestyle='--')
axs[1].plot(t2m_x_vals, t2m_jjas_y_kde_2, label='t2m (2010-2024)', color='red')
axs[1].fill_between(t2m_x_vals, t2m_jjas_y_kde_2, where=(t2m_x_vals > np.nanpercentile(df_jjas.t2m, 95)), color='red', alpha=0.5)
axs[1].fill_between(t2m_x_vals, t2m_jjas_y_kde_1, where=(t2m_x_vals > np.nanpercentile(df_jjas.t2m, 95)), color='blue', alpha=0.8)


axs[3].plot(ept_x_vals, ept_jjas_y_kde_1, label='ept (1971-1985)', color='blue', linestyle='--')
axs[3].plot(ept_x_vals, ept_jjas_y_kde_2, label='ept (2010-2024)', color='red')
axs[3].fill_between(ept_x_vals, ept_jjas_y_kde_2, where=(ept_x_vals > np.nanpercentile(df_jjas.ept, 95)), color='red', alpha=0.5)
axs[3].fill_between(ept_x_vals, ept_jjas_y_kde_1, where=(ept_x_vals > np.nanpercentile(df_jjas.ept, 95)), color='blue', alpha=0.8)

axs[5].plot(wbt_x_vals, wbt_jjas_y_kde_1, label='wbt (1971-1985)', color='blue', linestyle='--')
axs[5].plot(wbt_x_vals, wbt_jjas_y_kde_2, label='wbt (2010-2024)', color='red')
axs[5].fill_between(wbt_x_vals, wbt_jjas_y_kde_2, where=(wbt_x_vals > np.nanpercentile(df_jjas.wbt, 95)), color='red', alpha=0.5)
axs[5].fill_between(wbt_x_vals, wbt_jjas_y_kde_1, where=(wbt_x_vals > np.nanpercentile(df_jjas.wbt, 95)), color='blue', alpha=0.8)

increased_area=increase_in_filled_region(t2m_jjas_y_kde_1,t2m_jjas_y_kde_2,t2m_x_vals,t2m_x_vals > np.nanpercentile(df_jjas.t2m, 95))
axs[1].annotate(f'+{increased_area:.2f}%', xy=(35.4, 0.046),fontsize=9, color='red',)

increased_area=increase_in_filled_region(ept_jjas_y_kde_1,ept_jjas_y_kde_2,ept_x_vals,ept_x_vals > np.nanpercentile(df_jjas.ept, 95))
axs[3].annotate(f'+{increased_area:.2f}%', xy=(108.85, 0.02),fontsize=9, color='red',)

increased_area=increase_in_filled_region(wbt_jjas_y_kde_1,wbt_jjas_y_kde_2,wbt_x_vals,wbt_x_vals > np.nanpercentile(df_jjas.wbt, 95))
axs[5].annotate(f'+{increased_area:.2f}%', xy=(29.2, 0.1),fontsize=9, color='red',)

axs[1].arrow(x=33.6, y=0.05, dx=1.3, dy=0, width=.0035,color='red',head_width=0.0125,head_length=0.4,overhang=0.1)
axs[3].arrow(x=104.8, y=0.0215, dx=3, dy=0, width=.0012,color='red',head_width=0.0055,head_length=0.9,overhang=0.1)
axs[5].arrow(x=28.4, y=0.115, dx=0.6, dy=0, width=.008,color='red',head_width=0.035,head_length=0.15,overhang=0.1)



# # Set x-ticks
axs[1].set_xticks(np.arange(20, 42+1, 5))
axs[3].set_xticks(np.arange(75, 130+1, 10))
axs[5].set_xticks(np.arange(22, 32+1, 2))
axs[1].set_xlim(20, 42)


# # Set y-ticks
axs[1].set_yticks(np.arange(0, 0.45 + 0.01, 0.05))
axs[3].set_yticks(np.arange(0, 0.18 + 0.01, 0.03))
axs[5].set_yticks(np.arange(0, 1 + 0.01, 0.1))
axs[1].set_ylim(0, 0.45)
axs[3].set_ylim(0, 0.17)
axs[5].set_ylim(0, 1)


# Labels and titles
axs[1].set_xlabel('2m Temperature(°C)')
axs[1].set_ylabel('probability of occurrence')
axs[1].set_title('Monsoon',fontsize=10)

axs[3].set_xlabel('Equivalent Potential Temperature(°C)')
axs[3].set_ylabel('probability of occurrence')
axs[3].set_title('Monsoon',fontsize=10)

axs[5].set_xlabel('Wet Bulb Temperature(°C)')
axs[5].set_ylabel('probability of occurrence')
axs[5].set_title('Monsoon',fontsize=10)

# Add legends
axs[1].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)
axs[3].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)
axs[5].legend(loc='right',bbox_to_anchor=(1, 0.92),ncol=2)

plt.subplots_adjust( wspace=0.2,hspace=0.28,top=0.9)
plt.suptitle('Probability Density Functions',y=0.94,fontsize=12)

plt.show()

# Save the figure
fig.savefig("/content/drive/MyDrive/AP_HW/Scripts-ll/Manuscript codes/All_Figures/Fig_3_probability_density_functions.jpg",
            dpi=300, format="jpg", bbox_inches="tight")