In [13]:
import pandas as pd
### use xarray for extracting temperature data from .nc files
import xarray as xr 
import numpy as np
import geopandas as gpd
import datetime
import os
import metpy.calc as mpcalc
from metpy.units import units
import rioxarray
import zipfile
import os
# Directory to extract files
extract_dir = '/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/turkey/extracted'
gdf = gpd.read_file('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/adm0_shp/turkey/clean_shp.shp')

In [14]:

# Ensure the directory exists
os.makedirs(extract_dir, exist_ok=True)


# Unzip the NetCDF files
for year in ['2018', '2019', '2020', '2021', '2022']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/turkey/unextracted/era5_land_dewpoint_us_{year}.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_us_{year}.nc')
                os.rename(old_file_path, new_file_path)

# Unzip leftover_months NetCDF files
for year in ['2018', '2019', '2020', '2021', '2022']:
    zip_path = f'/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/dewpoint/turkey/unextracted/era5_land_dewpoint_us_{year}_xtra.netcdf.zip'
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
                # Rename extracted files
        for file_name in zip_ref.namelist():
            if file_name.endswith('.nc'):
                old_file_path = os.path.join(extract_dir, file_name)
                new_file_path = os.path.join(extract_dir, f'era5_land_dewpoint_us_{year}_xtra.nc')
                os.rename(old_file_path, new_file_path)            



In [15]:
data_arrays = []
for year in ['2018', '2019', '2020', '2021', '2022']:

    nc_file = os.path.join(extract_dir, f'era5_land_dewpoint_us_{year}.nc')
    ds_main = xr.open_dataset(nc_file)

    nc_file_xtra = os.path.join(extract_dir, f'era5_land_dewpoint_us_{year}_xtra.nc')
    ds_xtra = xr.open_dataset(nc_file_xtra)


    ds = xr.concat([ds_main, ds_xtra], dim='valid_time')
    ds = ds.sortby('valid_time')

    # Rename 'valid_time' to 'time'
    ds = ds.rename({'valid_time': 'time'})
    
    # Convert time to the desired timezone (e.g., 'America/Denver')
    ds['time'] = ds['time'] + pd.Timedelta(hours=3)

    # Convert from Kelvin to Celsius
    ds['t2m'] = ds['t2m'] - 273.15
    ds['d2m'] = ds['d2m'] - 273.15

    # Calculate relative humidity
    ds['rh'] = mpcalc.relative_humidity_from_dewpoint(ds['t2m'] * units.degC, ds['d2m'] * units.degC)
    ds['rh'] = ds['rh'] * 100

    # Calculate THI using metpy's heat_index function
    ds['thi'] = mpcalc.heat_index(ds['t2m'] * units.degC, ds['rh'] * units.percent, mask_undefined=False)


    daily_max_thi = ds.resample(time='1D').max()

    data_arrays.append(daily_max_thi)

    print('appended')

concat_daily_max = xr.concat(data_arrays, dim='time')


appended
appended
appended
appended
appended


In [18]:
concat_daily_max.to_netcdf('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/turkey/array/concat_array.nc')

In [19]:
concat_daily_max = xr.open_dataset('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/turkey/array/concat_array.nc')
concat_daily_max.rio.write_crs(4326, inplace=True)

In [20]:
concat_daily_max['longitude'].mean()

In [21]:

data_dic = {}

for idx, row in gdf.iterrows():

    adm2 = row['adm2_tr']
    adm1 = row['adm1_tr']
    # Clip the THI DataArray using the geometry
    try:
        clipped_thi = concat_daily_max.rio.clip([row.geometry], all_touched=True, crs=gdf.crs)
        
        # Convert the clipped THI DataArray to a DataFrame
        clipped_thi_df = clipped_thi.to_dataframe().reset_index()
        
        # Add the adm2 information to the DataFrame

        clipped_thi_df = clipped_thi_df.groupby('time').mean(numeric_only=True).reset_index()

        clipped_thi_df['adm2'] = adm2
        
        clipped_thi_df['adm1'] = adm1

        data_dic[f'{adm1}_{adm2}'] = clipped_thi_df

        print(f'{adm1}_{adm2} appended')

    except:
        continue

    


İSTANBUL_ŞİŞLİ appended
İZMİR_ÇİĞLİ appended
İSTANBUL_ŞİLE appended
GÜMÜŞHANE_ŞİRAN appended
SİİRT_ŞİRVAN appended
ÇANKIRI_ŞABANÖZÜ appended
GAZİANTEP_ŞAHİNBEY appended
TRABZON_ŞALPAZARI appended
KÜTAHYA_ŞAPHANE appended
ISPARTA_ŞARKİKARAAĞAÇ appended
TEKİRDAĞ_ŞARKÖY appended
ARTVİN_ŞAVŞAT appended
ANTALYA_İBRADI appended
ŞIRNAK_İDİL appended
GİRESUN_ŞEBİNKARAHİSAR appended
YOZGAT_ŞEFAATLİ appended
GAZİANTEP_ŞEHİTKAMİL appended
MANİSA_ŞEHZADELER appended
KIRŞEHİR_ÇİÇEKDAĞI appended
HAKKARİ_ŞEMDİNLİ appended
ERZURUM_ŞENKAYA appended
KASTAMONU_ŞENPAZAR appended
ANKARA_ŞEREFLİKOÇHİSAR appended
ESKİŞEHIR_ÇİFTELER appended
NİĞDE_ÇİFTLİK appended
YALOVA_ÇİFTLİKKÖY appended
AFYONKARAHİSAR_İHSANİYE appended
KASTAMONU_İHSANGAZİ appended
ŞIRNAK_ŞIRNAK appended
ORDU_İKİZCE appended
RİZE_İKİZDERE appended
ERZİNCAN_İLİÇ appended
DÜZCE_ÇİLİMLİ appended
SAMSUN_İLKADIM appended
ADANA_İMAMOĞLU appended
SİVAS_İMRANLI appended
AYDIN_İNCİRLİOVA appended
KAYSERİ_İNCESU appended
AYDIN_ÇİNE appended
KASTAMON

In [26]:
final_concat = pd.concat(data_dic.values(), ignore_index=True)

In [27]:
final_concat.drop(columns=['latitude', 'longitude'], inplace=True)

In [44]:
final_concat.to_csv('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/turkey/daily_max_THI.csv')

In [35]:
df = pd.read_csv('/Users/shivyucel/Documents/projects/DPhil/Code_Data/data/THI/turkey/daily_max_THI.csv')