In [4]:
import pandas as pd
### use xarray for extracting temperature data from .nc files
import xarray as xr 
import numpy as np
import geopandas as gpd
import datetime
import os
import metpy.calc as mpcalc
from metpy.units import units
import rioxarray
import zipfile
import os

# Directory to extract files
extract_dir = '/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/india/extracted'
gdf = gpd.read_file('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/adm0_shp/india/final_clean_shp.shp')

In [7]:

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)


# Unzip the NetCDF files
for year in ['2018', '2019', '2020', '2021', '2022']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/india/unextracted/era5_land_dewpoint_{year}.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_{year}.nc')
                os.rename(old_file_path, new_file_path)
           



In [8]:
data_arrays = []
for year in ['2018', '2019', '2020', '2021', '2022']:

    nc_file = os.path.join(extract_dir, f'era5_land_dewpoint_{year}.nc')
    ds = xr.open_dataset(nc_file)

    ds = ds.sortby('valid_time')

    # Rename 'valid_time' to 'time'
    ds = ds.rename({'valid_time': 'time'})
    
    # Convert time to the desired timezone (e.g., 'Indian Standard Time')
    ds['time'] = ds['time'] + pd.Timedelta(hours=5, minutes=30)

    # Convert from Kelvin to Celsius
    ds['t2m'] = ds['t2m'] - 273.15
    ds['d2m'] = ds['d2m'] - 273.15

    # Calculate relative humidity
    ds['rh'] = mpcalc.relative_humidity_from_dewpoint(ds['t2m'] * units.degC, ds['d2m'] * units.degC)
    ds['rh'] = ds['rh'] * 100

    # Calculate THI using metpy's heat_index function
    ds['thi'] = mpcalc.heat_index(ds['t2m'] * units.degC, ds['rh'] * units.percent, mask_undefined=False)


    daily_max_thi = ds.resample(time='1D').max()

    data_arrays.append(daily_max_thi)

    print('appended')

    
concat_daily_max = xr.concat(data_arrays, dim='time')


appended
appended
appended
appended
appended


In [9]:
concat_daily_max.to_netcdf('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/india/concat_array.nc')

In [11]:
concat_daily_max = xr.open_dataset('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/india/array/concat_array.nc')
concat_daily_max.rio.write_crs(4326, inplace=True)

In [12]:

data_dic = {}

for idx, row in gdf.iterrows():

    adm2 = row['adm2']
    adm1 = row['adm1']
    # Clip the THI DataArray using the geometry
    try:
        clipped_thi = concat_daily_max.rio.clip([row.geometry], all_touched=True, crs=gdf.crs)
        
        # Convert the clipped THI DataArray to a DataFrame
        clipped_thi_df = clipped_thi.to_dataframe().reset_index()
        
        # Add the adm2 information to the DataFrame

        clipped_thi_df = clipped_thi_df.groupby('time').mean(numeric_only=True).reset_index()

        clipped_thi_df['adm2'] = adm2
        
        clipped_thi_df['adm1'] = adm1

        data_dic[f'{adm1}_{adm2}'] = clipped_thi_df

        print(f'{adm1}_{adm2} appended')

    except:
        continue

    


Jammu and Kashmir_Kupwara appended
Jammu and Kashmir_Bandipore appended
Jammu and Kashmir_Baramula appended
Jammu and Kashmir_Ganderbal appended
Jammu and Kashmir_Anantnag appended
Jammu and Kashmir_Kishtwar appended
Jammu and Kashmir_Srinagar appended
Jammu and Kashmir_Badgam appended
Jammu and Kashmir_Pulwama appended
Jammu and Kashmir_Punch appended
Jammu and Kashmir_Shupiyan appended
Jammu and Kashmir_Kulgam appended
Jammu and Kashmir_Rajouri appended
Jammu and Kashmir_Ramban appended
Jammu and Kashmir_Reasi appended
Jammu and Kashmir_Doda appended
Jammu and Kashmir_Jammu appended
Jammu and Kashmir_Kathua appended
Jammu and Kashmir_Udhampur appended
Jammu and Kashmir_Samba appended
Jammu and Kashmir_Muzaffarabad appended
Jammu and Kashmir_Mirpur appended
Himachal Pradesh_Lahul & Spiti appended
Himachal Pradesh_Chamba appended
Himachal Pradesh_Kangra appended
Himachal Pradesh_Kullu appended
Himachal Pradesh_Mandi appended
Himachal Pradesh_Kinnaur appended
Himachal Pradesh_Una append

In [13]:
final_concat = pd.concat(data_dic.values(), ignore_index=True)

In [14]:
final_concat.drop(columns=['latitude', 'longitude'], inplace=True)

In [15]:
import seaborn as sns

In [16]:
len(final_concat['adm1'].unique())

37

In [17]:
final_concat.to_csv('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/india/daily_max_THI.csv')