In [None]:
## packages
from calendar import monthrange
import cartopy.crs as ccrs
from datetime import datetime, timedelta
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import xarray as xr
import shutil
import warnings
warnings.filterwarnings("ignore")


# 1) FUNCTIONS

In [None]:
def select_netCDF(path, date):
    """
    Given a date, selects the corresponding NetCDF file ending with *YYYYMM.nc.
    """
    year, month, _ = date.split('-')  # Extract year and month from the date string

    # Search for files ending with *YYYYMM.nc
    files = [
        os.path.join(dirpath, filename)
        for dirpath, _, filenames in os.walk(path)
        for filename in filenames if filename.endswith(f'{year}{month}.nc')
    ]
    if not files:  # If the list is empty, return None or handle the error appropriately
        return None
    return files[0]


def calculate_square_bounds(lon, lat, side_km):
    """
    Calculate the bounds of a square centered at a given lon/lat.
    
    Parameters:
        lon (float): Longitude of the center.
        lat (float): Latitude of the center.
        side_length_km (float): Side length of the square in kilometers.
    
    Returns:
        dict: Dictionary with the extreme coordinates of the square.
    """
    earth_circ_km = 40075 #km
    km_per_degree =  earth_circ_km/360  # this gets how much is a degree in the equator


    # Calculate how much our sqaure will be 
    half_side_lat_degree = (side_km / 2) / (km_per_degree) 
    half_side_lon_degree = (side_km / 2) / (km_per_degree * np.cos(np.deg2rad(lat))) # this accounts for the fact that 
                                                                                     # meridians are closer together as 
                                                                                     # you approach the poles
    
    # Calculate bounds
    lat_min = lat - half_side_lat_degree
    lat_max = lat + half_side_lat_degree
    lon_min = lon - half_side_lon_degree
    lon_max = lon + half_side_lon_degree

    # Return the bounds as a tuple
    return lat_min, lat_max, lon_min, lon_max


def select_area_climex2(u_v, lat_min, lat_max, lon_min, lon_max):
    
    # load the rotated coordinates system
    pole_longitude = u_v.rotated_pole.grid_north_pole_longitude
    pole_latitude = u_v.rotated_pole.grid_north_pole_latitude
    rotated_pole = ccrs.RotatedPole(pole_longitude=pole_longitude, pole_latitude=pole_latitude)

    # transform the lon/lat min/max into the rotated coordinates
    geodetic = ccrs.PlateCarree()

    # Convert geographic lat/lon to rotated coordinates
    rlon_min, rlat_min = rotated_pole.transform_point(lon_min, lat_min, geodetic)
    rlon_max, rlat_max = rotated_pole.transform_point(lon_max, lat_max, geodetic)


    # Select the area
    region = u_v.sel(rlon=slice(rlon_min, rlon_max), rlat=slice(rlat_min, rlat_max))

    return region


def create_wind_dataframe(fire_date, region, lon, lat, climate_data, duration=24, step=3):
    """
    Create a DataFrame with average wind data for a specified duration with steps of 3 hours 
    from the CLIMEX dataset, using the rotated coordinate system.

    Parameters:
    - fire_date (str): Start date of the fire in 'YYYY-MM-DD' format.
    - region (xarray.Dataset): CLIMEX dataset containing u_v data in rotated coordinates.
    - lon (float): Longitude of the fire location.
    - lat (float): Latitude of the fire location.
    - duration (int): Total duration in hours to consider (default: 24 h).
    - step (int): Time step interval in hours (default: 3).
    
    Returns:
    - pd.DataFrame: DataFrame with columns ['date', 'wind_speed', 'wind_direction', 'rlon', 'rlat'].
    """
    time_steps = int(duration / step)

    # Initialize the DataFrame
    df = pd.DataFrame(columns=['date', 'wind_speed', 'wind_direction', 'lon', 'lat'])


    for i in range(time_steps):
            # Calculate the time_step
            if i == 0:
                time_step = datetime.strptime(fire_date, '%Y-%m-%d') + timedelta(hours=12)
            else:
                time_step = datetime.strptime(fire_date, '%Y-%m-%d') + timedelta(hours=12) + i * timedelta(hours=step)

            
            # extract u and v for the selected time
            u = region['uas'].sel(time = np.datetime64(time_step))
            v = region['vas'].sel(time = np.datetime64(time_step))
            
            # do the mean over the coordinates
            if climate_data == 'climex2':
                u_mean = u.mean(dim=['rlon', 'rlat'], skipna=True).values
                v_mean = v.mean(dim=['rlon', 'rlat'], skipna=True).values
            
            elif climate_data == 'era5-land':
                u_mean = u.mean(dim=['lon', 'lat'], skipna=True).values
                v_mean = v.mean(dim=['lon', 'lat'], skipna=True).values



            # calculate the wind speed magnitude and direction
            wind_speed = np.sqrt(u_mean**2 + v_mean**2)
            wind_direction = (180 / np.pi) * np.arctan2(-v_mean, -u_mean) % 360

            if wind_speed == np.nan:
                print(f"{lat} N, {lon} E")
                print(time_step)

                wind_speed = 0
            if wind_direction == np.nan:
                wind_direction = 0


            # Format the date as 'YYYY-MM-DD_HH:mm:ssZ'
            formatted_date = time_step.strftime('%Y-%m-%d_%H:%M:%SZ')
            
            # Create a new row for this time step
            new_row = pd.DataFrame({
                'date': [formatted_date],
                'wind_speed': [wind_speed],
                'wind_direction': [wind_direction],
                'lon': [lon],
                'lat': [lat]
            })

            # Append the new row to the DataFrame
            df = pd.concat([df, new_row], ignore_index=True)
    
    return df

# 2) LOAD IGNITIONS

# 3) SELEC THE CLIMATE DATA

In [3]:
region = 'south-central' # 'north' 
save_path = fr"C:\Users\jsoma\Desktop\fires\{region}"

if region == 'south-central':
    pathFires = r'O:\Climate-and-Energy-Policy\CERM\Projects\Wildfire\Data\ML-data-Med-Central-Europe-gridded\results\ignition-selection'
elif region == 'north':
    pathFires = r'O:\Climate-and-Energy-Policy\CERM\Projects\Wildfire\Data\ML-data-Northern-Europe-gridded\results\ignition-selection'

In [4]:
climate_data = 'era5-land' # era5-land
scenario = 'etp' # eop



if climate_data == 'climex2':
    if scenario == 'etp':
        # directory to find the  climate data
        directory = r'O:\Public\sharing-4270-CERM\VLYMI\CLIMEX2\GlobusDownload\ClimExII_4_Friends\ClimExII_4_Friends\etp'
        folder_path = save_path + '/' + scenario
        print(folder_path)
    elif scenario == 'eop':
        # directory to find the  climate data
        directory = r'O:\Public\sharing-4270-CERM\VLYMI\CLIMEX2\GlobusDownload\ClimExII_4_Friends\ClimExII_4_Friends\eop'
        folder_path = save_path + '/' + scenario
        print(folder_path)

if climate_data == 'era5-land':
    # get the ignition files
    fires_files = pathFires + '/fire-season-ignition-points-2008-2023thresh0.7.csv'
    fires = pd.read_csv(fires_files)

    # directory to find the  climate data
    directory = r'O:\Climate-and-Energy-Policy\CERM\Projects\Wildfire\Data\ERA5-land\unprocess\wind'
    folder_path = save_path + '/' + climate_data
    print(folder_path)


C:\Users\jsoma\Desktop\fires\south-central/era5-land


In [5]:
fires

Unnamed: 0.1,Unnamed: 0,date,lon,lat
0,0,2008-06-01,14.491667,37.301251
1,1,2008-06-01,21.517020,39.343172
2,2,2008-06-01,23.160206,40.615252
3,3,2008-06-01,20.061604,42.023694
4,4,2008-06-01,9.012799,39.955603
...,...,...,...,...
22533,22533,2023-10-29,24.868278,35.158036
22534,22534,2023-10-30,14.220456,37.520762
22535,22535,2023-10-30,21.689154,41.781243
22536,22536,2023-10-31,13.705589,37.472079


# 4 ) LOOP OVER IGNITIONS 

In [None]:
for index, row in fires.iterrows():
    
    # stuore the date, lon and lat
    date = row['date']
    lon = row['lon']
    lat = row['lat']

    # Create a folder ----------------------------------------
    folder_name =  folder_path + f"\\{date}_{lon:.2f}E_{lat:.2f}N"
    os.makedirs(folder_name, exist_ok=True)

    # --------------------------------------------------------


    # 1) select the time for the netcdf/open the files
    fire_date = date
    year, month, day = date.split('-')

    last_day = monthrange(int(year), int(month))[1]
    last_day = str(last_day)



    if day != last_day:
        if climate_data == 'climex2':
            # Get files name -----------------
            ufile= f'{directory}//{year}//uas_{scenario}_{year}{month}_se.nc'
            vfile= f'{directory}//{year}//vas_{scenario}_{year}{month}_se.nc'

            
            # Get thet netcdf and combine them
            u = xr.open_dataset(ufile)
            v = xr.open_dataset(vfile)
            u_v = xr.merge([u,v])

        elif climate_data == 'era5-land':
            # Get files name -----------------
            file_start_fire = select_netCDF(directory, fire_date)

            # Get thet netcdf ----------------
            u_v = xr.open_dataset(file_start_fire)
            u_v = u_v.rename({'valid_time': 'time', 
                            'latitude':'lat', 
                            'longitude': 'lon', 
                            'u10': 'uas', 
                            'v10': 'vas'})

        # 2 ) Select fire date
        date_ini = date
        date_end = (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
        u_v = u_v.sel(time=slice(date_ini, date_end))

        # 3) Select the area for our simulation 
        side_length_km = 30  
        lat_min, lat_max, lon_min, lon_max = calculate_square_bounds(lon, lat, side_length_km)

        # Extract region
        region = (select_area_climex2(u_v, lat_min, lat_max, lon_min, lon_max) 
                if climate_data == 'climex2' 
                else u_v.sel(lat=slice(lat_max, lat_min), lon=slice(lon_min, lon_max)))

        # 4) Calculate the mean wind profile
        wind_df = create_wind_dataframe(date, region, lon, lat, climate_data)

        # Save the wind data in the folder
        csv_path = os.path.join(folder_name, 'wind_data.csv')
        wind_df.to_csv(csv_path, index=False)

    else:
        next_month = f"{int(month) + 1:02d}" if month != "12" else "01"

        if climate_data == 'climex2':
            # Get files name -----------------
            ufile_current= f'{directory}//{year}//uas_{scenario}_{year}{month}_se.nc'
            vfile_current= f'{directory}//{year}//vas_{scenario}_{year}{month}_se.nc'

            ufile_next= f'{directory}//{year}//uas_{scenario}_{year}{next_month}_se.nc'
            vfile_next= f'{directory}//{year}//vas_{scenario}_{year}{next_month}_se.nc'
            
            # Get thet netcdf and combine them
            u_current = xr.open_dataset(ufile_current)
            v_current = xr.open_dataset(vfile_current)
            
            u_next = xr.open_dataset(ufile_next)
            v_next = xr.open_dataset(vfile_next)

            u_v_current = xr.merge([u_current,v_current])
            u_v_next = xr.merge([u_next,v_next])
            u_v = xr.concat([u_v_current, u_v_next], dim='time')
       



01
01
01
01
01
02
02
02
02
02
03
03
03
04
04
05
05
06
06
07
07
07
08
08
08
09
09
09
10
10
10
11
11
11
11
12
12
12
12
13
13
13
13
14
14
14
14
14
15
15
15
15
16
16
16
17
17
17
17
17
17
18
18
18
18
18
18
18
19
19
19
19
19
19
19
20
20
20
20
20
20
20
20
21
21
21
21
21
21
21
21
21
21
22
22
22
22
22
22
22
22
22
22
22
23
23
23
23
23
23
23
23
23
23
23
24
24
24
24
24
24
24
24
24
24
24
24
25
25
25
25
25
25
25
25
25
25
25
25
25
25
26
26
26
26
26
26
26
26
26
26
26
26
26
26
26
27
27
27
27
27
27
27
27
27
27
27
27
27
28
28
28
28
28
28
28
28
28
28
28
28
29
29
29
29
29
29
29
29
29
29
29
29
30
30
30
30
30
30
30
30
30
30
30
30
01
01
01
01
01
01
01
01
01
01
01
01
02
02
02
02
02
02
02
02
02
02
03
03
03
03
03
03
03
03
03
04
04
04
04
04
04
04
04
04
05
05
05
05
05
05
05
05
05
05
06
06
06
06
06
06
06
06
06
06
07
07
07
07
07
07
07
07
07
07
07
08
08
08
08
08
08
08
08
08
08
09
09
09
09
09
09
09
09
09
09
09
10
10
10
10
10
10
10
10
10
10
10
10
11
11
11
11
11
11
11
11
11
11
11
12
12
12
12
12
12
12
12
12
12
12
12
13
1

In [12]:
# Convert date to datetime and add one day
next_date = (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
next_date

'2023-11-01'

In [17]:
month = '06'
next_month = f"{int(month) + 1:02d}" if month != "12" else "01"
next_month

'07'

In [18]:
# Get files name -----------------
            ufile_current= f'{directory}//{year}//uas_{scenario}_{year}{month}_se.nc'
            vfile_current= f'{directory}//{year}//vas_{scenario}_{year}{month}_se.nc'

            ufile_next= f'{directory}//{year}//uas_{scenario}_{year}{next_month}_se.nc'
            vfile_next= f'{directory}//{year}//vas_{scenario}_{year}{next_month}_se.nc'
            
            # Get thet netcdf and combine them
            u_current = xr.open_dataset(ufile_current)
            v_current = xr.open_dataset(vfile_current)
            
            u_next = xr.open_dataset(ufile_next)
            v_next = xr.open_dataset(vfile_next)

            u_v_current = xr.merge([u_current,v_current])
            u_v_next = xr.merge([u_next,v_next])
            u_v = xr.concat([u_v_current, u_v_next], dim='time')

IndentationError: unexpected indent (812801737.py, line 2)