In [1]:
import pandas as pd
import cdsapi
import os
import concurrent.futures
import nest_asyncio
import os
import xarray as xr
nest_asyncio.apply()

## This function checks if the file is already present in the directory or not
def isFilePresent( file_name):
    file_present = os.path.isfile(file_name)
    if file_present:
        print('Alreaday present : '+file_name)
    else:
        print('file absent : '+file_name)
    return file_present


In [2]:
## This function download the climate record using CDS api based on date, area coordinates (north, west, south, east coordinates), and fire occurrence in day/night
def download_climate_data_xarray_file_only(file_parameters):

    date = file_parameters["date"]
    latitude = file_parameters["latitude"] 
    longitude = file_parameters["longitude"] 
    area = file_parameters["area"] 
    day_night_ind = file_parameters["day_night_ind"]
    
    ## initialize CDS api
    c = cdsapi.Client()
    
    date_split = date.split("-")
    year = date_split[0]
    month = date_split[1]
    day = date_split[2]
    
    area_str = '_'.join(str(num) for num in area)
    
    file_name = '../dataset/nc_files/'+year+'/weather-'+str(longitude)+'-'+str(latitude)+'-'+date+'-'+day_night_ind+'.nc'
    ## This method is invoked in loop for fire records of a year, and api invocation can get interrupted, breaking the loop.
    ## When the loop is restarted, the below if condition prevents invoking api again 
    if not isFilePresent(file_name):
        ## if the day/night indicator is day then climate data will be fetch for 12:00PM (afternoon time)
        ## if the day/night indicator is night then climate data will be fetched for 10:00PM (night time)
        time = '12:00'
        if day_night_ind == "N":
            time = '22:00'
        ## CDS api call which saves the response as .nc file
        c.retrieve('reanalysis-era5-single-levels',
        {
            'product_type': 'reanalysis',
            'format': 'netcdf',
            'variable': [
            '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
            'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3',
            'soil_temperature_level_4', 'soil_type', 'total_precipitation', 'volumetric_soil_water_layer_1',
            'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4'
            ],
            'year': year,
            'month': month,
            'day': [str(day)],
            'time': [time],
            'area': area,
        },
        file_name)


In [3]:
def group_fire_data_xarray_file_only(file_name):
    file_path = "../dataset/firedata/" + file_name + ".csv"
    fire_data = pd.read_csv(file_path)
    
    filtered_fire_data = fire_data.query("`confidence` >= 90 and `type` == 0")
    grouped_fire_data = filtered_fire_data.groupby(['acq_date', 'daynight']).agg({'latitude':'mean','longitude':'mean','brightness':'mean','confidence':'mean', 'frp':'mean', 'bright_t31':'mean'}).reset_index()

    params = []
    distance = 1/111
    for modis_data_index, modis_row in grouped_fire_data.iterrows():
        #print(modis_row['acq_date'])
        date_value = modis_row['acq_date']
        confidence = modis_row['confidence']
        latitude = modis_row['latitude']
        longitude = modis_row['longitude']
        day_night_ind = modis_row['daynight']
        #print(type(confidence))
        if confidence >= 90:
            north_coord = latitude + distance
            south_coord = latitude - distance
            west_coord = longitude - distance
            east_coord = longitude + distance
            area = [north_coord, west_coord, south_coord, east_coord]
            #print(area)

            file_paramaters = {
                "date" :date_value, 
                "latitude" : latitude, 
                "longitude" : longitude, 
                "area": area, 
                "day_night_ind": day_night_ind
            }
            params.append(file_paramaters)

    return params

In [4]:
file_params = group_fire_data_xarray_file_only("canada_2023")
# file_params = group_fire_data_xarray_file_only("canada_2022")
# file_params = group_fire_data_xarray_file_only("canada_2021")
# file_params = group_fire_data_xarray_file_only("canada_2020")
# file_params = group_fire_data_xarray_file_only("canada_2019")
# file_params = group_fire_data_xarray_file_only("canada_2018")
# file_params = group_fire_data_xarray_file_only("canada_2017")
# file_params = group_fire_data_xarray_file_only("canada_2016")
# file_params = group_fire_data_xarray_file_only("canada_2015")
# file_params = group_fire_data_xarray_file_only("canada_2014")
# file_params = group_fire_data_xarray_file_only("canada_2013")
# file_params = group_fire_data_xarray_file_only("canada_2012")
# file_params = group_fire_data_xarray_file_only("canada_2011")
# file_params = group_fire_data_xarray_file_only("canada_2010")

In [5]:
# Create a ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor(max_workers=200) as executor:
    # Submit download_data function for each variable
    futures = [executor.submit(download_climate_data_xarray_file_only, variable) for variable in file_params]

    # Wait for all tasks to complete
    for future in concurrent.futures.as_completed(futures):
        try:
            future.result()
        except Exception as e:
            print(f"An error occurred: {e}")

Alreaday present : ../dataset/nc_files/2023/weather--117.14828947368422-53.40439473684211-2023-01-03-D.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.3086-53.0825-2023-01-01-D.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.966975-53.4962-2023-01-02-D.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.544-54.41256666666666-2023-01-01-N.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.73523333333333-55.16806666666667-2023-01-03-N.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.1609-54.921654545454544-2023-01-04-D.nc
Alreaday present : ../dataset/nc_files/2023/weather--115.65476666666666-54.16863333333333-2023-01-06-D.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.94454-56.11586-2023-01-07-N.nc
Alreaday present : ../dataset/nc_files/2023/weather--120.230525-56.843975-2023-01-08-N.nc
Alreaday present : ../dataset/nc_files/2023/weather--117.4282-53.3016-2023-01-04-N.nc
Alreaday present : ../dataset/nc_files/2023/weather-