In [1]:
import pandas as pd
### use xarray for extracting temperature data from .nc files
import xarray as xr 
import numpy as np
import geopandas as gpd
import datetime
import os
import metpy.calc as mpcalc
from metpy.units import units
import rioxarray
import zipfile
import os

# Directory to extract files
extract_dir = '/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/brazil/extracted'
gdf = gpd.read_file('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/adm0_shp/brazil/clean_shp.shp')

In [3]:

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)


# Unzip the NetCDF files
for year in ['2017', '2018', '2019', '2020', '2021']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/brazil/unextracted/11_12/era5_land_dewpoint_{year}.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_11_12_{year}.nc')
                os.rename(old_file_path, new_file_path)
           



In [4]:

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)


# Unzip the NetCDF files
for year in ['2018', '2019', '2020', '2021', '2022']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/brazil/unextracted/01_03/era5_land_dewpoint_{year}.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_01_03_{year}.nc')
                os.rename(old_file_path, new_file_path)
           



In [6]:

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)


# Unzip the NetCDF files
for year in ['2018', '2019', '2020', '2021', '2022']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/brazil/unextracted/01_03/era5_land_dewpoint_{year}_xtra.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_01_03_{year}_xtra.nc')
                os.rename(old_file_path, new_file_path)
           



In [None]:
data_arrays = []
for year in ['2017', '2018', '2019', '2020', '2021']:

    nc_file_11_12 = os.path.join(extract_dir, f'era5_land_dewpoint_11_12_{year}.nc')
    ds_11_12 = xr.open_dataset(nc_file_11_12)

    nc_file_01_03 = os.path.join(extract_dir, f'era5_land_dewpoint_01_03_{int(year ) + 1}.nc')
    ds_01_03 = xr.open_dataset(nc_file_01_03)

    nc_file_xtra= os.path.join(extract_dir, f'era5_land_dewpoint_01_03_{int(year ) + 1}_xtra.nc')
    ds_xtra = xr.open_dataset(nc_file_xtra)

    ds = xr.concat([ds_11_12, ds_01_03, ds_xtra], dim='valid_time')
    ds = ds.sortby('valid_time')

    # Rename 'valid_time' to 'time'
    ds = ds.rename({'valid_time': 'time'})
    
    # Convert time to the desired timezone (e.g., 'Brasilia')
    ds['time'] = ds['time']- pd.Timedelta(hours=3)

    # Convert from Kelvin to Celsius
    ds['t2m'] = ds['t2m'] - 273.15
    ds['d2m'] = ds['d2m'] - 273.15

    # Calculate relative humidity
    ds['rh'] = mpcalc.relative_humidity_from_dewpoint(ds['t2m'] * units.degC, ds['d2m'] * units.degC)
    ds['rh'] = ds['rh'] * 100

    # Calculate THI using metpy's heat_index function
    ds['thi'] = mpcalc.heat_index(ds['t2m'] * units.degC, ds['rh'] * units.percent, mask_undefined=False)

    #get daily maximum of THI at each lat/lon position
    daily_max_thi = ds.resample(time='1D').max()

    data_arrays.append(daily_max_thi)

    print('appended')

    
concat_daily_max = xr.concat(data_arrays, dim='time')
concat_daily_max.to_netcdf('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/brazil/array/concat_array.nc')

In [22]:
concat_daily_max.to_netcdf('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/brazil/array/concat_array.nc')

In [23]:
concat_daily_max = xr.open_dataset('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/brazil/array/concat_array.nc')
concat_daily_max.rio.write_crs(4326, inplace=True)

In [24]:
# Find all cells that intesect with each geometry
# get the mean of the maximum daily THI values across all lat/lons that intersect with the geometry
data_dic = {}

for idx, row in gdf.iterrows():

    adm2 = row['adm2']
    adm1 = row['adm1']
    # Clip the THI DataArray using the geometry
    try:
        clipped_thi = concat_daily_max.rio.clip([row.geometry], all_touched=True, crs=gdf.crs)
        
        # Convert the clipped THI DataArray to a DataFrame
        clipped_thi_df = clipped_thi.to_dataframe().reset_index()
        
        # In previous step, we get each cell's daily maximum THI value.
        # Here, we take the mean of the maximum daily THI values across all lat/lons that intersect with the geometry
        clipped_thi_df = clipped_thi_df.groupby('time').mean(numeric_only=True).reset_index()

        clipped_thi_df['adm2'] = adm2
        
        clipped_thi_df['adm1'] = adm1

        data_dic[f'{adm1}_{adm2}'] = clipped_thi_df

        print(f'{adm1}_{adm2} appended')

    except:
        continue

    


Rondonia_Alta Floresta D'Oeste appended
Rondonia_Ariquemes appended
Rondonia_Cabixi appended
Rondonia_Cacoal appended
Rondonia_Cerejeiras appended
Rondonia_Colorado do Oeste appended
Rondonia_Corumbiara appended
Rondonia_Costa Marques appended
Rondonia_Espigao D'Oeste appended
Rondonia_Guajara-Mirim appended
Rondonia_Jaru appended
Rondonia_Ji-Parana appended
Rondonia_Machadinho D'Oeste appended
Rondonia_Nova Brasilandia D'Oeste appended
Rondonia_Ouro Preto do Oeste appended
Rondonia_Pimenta Bueno appended
Rondonia_Porto Velho appended
Rondonia_Presidente Medici appended
Rondonia_Rio Crespo appended
Rondonia_Rolim de Moura appended
Rondonia_Santa Luzia D'Oeste appended
Rondonia_Vilhena appended
Rondonia_Sao Miguel do Guapore appended
Rondonia_Nova Mamore appended
Rondonia_Alvorada D'Oeste appended
Rondonia_Alto Alegre dos Parecis appended
Rondonia_Alto Paraiso appended
Rondonia_Buritis appended
Rondonia_Novo Horizonte do Oeste appended
Rondonia_Cacaulandia appended
Rondonia_Campo Novo d

In [25]:
final_concat = pd.concat(data_dic.values(), ignore_index=True)

In [26]:
final_concat.drop(columns=['latitude', 'longitude'], inplace=True)

In [27]:
final_concat.to_csv('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/brazil/daily_max_THI.csv')