In [2]:
import pandas as pd
import os
import xarray as xr


def isFilePresent(file_name):
    file_present = os.path.isfile(file_name)
    return file_present

## This function reads the data from .nc file and converts to dataframe row
## the .nc file has climate data for 2-4 combination of latitude and longitude, around the requested area
def convert_nc_to_climate_data_row(date, latitude, longitude, area, day_night_ind, result_df):

    date_split = date.split("-")
    year = date_split[0]
    month = date_split[1]
    day = date_split[2]

    file_name = '../dataset/nc_files/'+year + '/weather-' +str(longitude)+'-'+str(latitude)+'-'+date+'-'+day_night_ind+'.nc'
    ds = xr.open_dataset(file_name)

    #print(f"date - {date} - dataset shape: {len(ds['longitude'].values)} {len(ds['longitude'].values)}")

    for i in range(0, len(ds['latitude'].values)):
        for j in range(0, len(ds['longitude'].values)):
            ds_longitude = ds['longitude'].values[j]
            ds_latitude = ds['latitude'].values[i]
            u10 = ds['u10'].values[0,][i, j]
            v10 = ds['v10'].values[0,][i, j]
            t2m = ds['t2m'].values[0,][i, j]
            stl1 = ds['stl1'].values[0,][i, j]
            stl2 = ds['stl2'].values[0,][i, j]
            stl3 = ds['stl3'].values[0,][i, j]
            stl4 = ds['stl4'].values[0,][i, j]
            tp = ds['tp'].values[0,][i, j]
            swvl1 = ds['swvl1'].values[0,][i, j]
            swvl2 = ds['swvl2'].values[0,][i, j]
            swvl3 = ds['swvl3'].values[0,][i, j]
            swvl4 = ds['swvl4'].values[0,][i, j]
            soil_type = ds['slt'].values[0,][i, j]

            new_row = {'longitude': ds_longitude,
                       'latitude': ds_latitude,
                       'date': date,
                       'month': month,
                       'daynight': day_night_ind,
                       '10m_u_component_of_wind': u10,
                       '10m_v_component_of_wind': v10,
                       '2m_temperature': t2m,
                       'soil_temperature_level_1': stl1,
                       'soil_temperature_level_2': stl2,
                       'soil_temperature_level_3': stl3,
                       'soil_temperature_level_4': stl4,
                       'soil_type': soil_type,
                       'total_precipitation': tp,
                       'volumetric_soil_water_layer_1': swvl1,
                       'volumetric_soil_water_layer_2': swvl2,
                       'volumetric_soil_water_layer_3': swvl3,
                       'volumetric_soil_water_layer_4': swvl4}
            result_df.loc[len(result_df)] = new_row

## This function also reads the fire record and converts the already downloaded climate .nc files to csv file.
## You would see duplicate code from download climate data
def convert_nc_to_csv(file_name):
    file_path = "../dataset/firedata/" + file_name + ".csv"
    fire_data = pd.read_csv(file_path)

    filtered_fire_data = fire_data.query("`confidence` >= 90 and `type` == 0")
    grouped_fire_data = filtered_fire_data.groupby(['acq_date', 'daynight']).agg(
        {'latitude': 'mean', 'longitude': 'mean', 'brightness': 'mean', 'confidence': 'mean', 'frp': 'mean',
         'bright_t31': 'mean'}).reset_index()

    # grouped_fire_data.apply(apply_on_each_group)

    result_df = pd.DataFrame({'longitude': pd.Series(dtype='float'),
                              'latitude': pd.Series(dtype='float'),
                              'date': pd.Series(dtype='str'),
                              'month': pd.Series(dtype='int'),
                              'daynight': pd.Series(dtype='str'),
                              '10m_u_component_of_wind': pd.Series(dtype='float'),
                              '10m_v_component_of_wind': pd.Series(dtype='float'),
                              '2m_temperature': pd.Series(dtype='float'),
                              'soil_temperature_level_1': pd.Series(dtype='float'),
                              'soil_temperature_level_2': pd.Series(dtype='float'),
                              'soil_temperature_level_3': pd.Series(dtype='float'),
                              'soil_temperature_level_4': pd.Series(dtype='float'),
                              'soil_type': pd.Series(dtype='float'),
                              'total_precipitation': pd.Series(dtype='float'),
                              'volumetric_soil_water_layer_1': pd.Series(dtype='float'),
                              'volumetric_soil_water_layer_2': pd.Series(dtype='float'),
                              'volumetric_soil_water_layer_3': pd.Series(dtype='float'),
                              'volumetric_soil_water_layer_4': pd.Series(dtype='float')})

    # print(grouped_fire_data)
    distance = 1 / 111
    for modis_data_index, modis_row in grouped_fire_data.iterrows():
        # print(modis_row['acq_date'])
        date_value = modis_row['acq_date']
        confidence = modis_row['confidence']
        latitude = modis_row['latitude']
        longitude = modis_row['longitude']
        day_night_ind = modis_row['daynight']
        # print(type(confidence))
        if confidence >= 90:
            north_coord = latitude + distance
            south_coord = latitude - distance
            west_coord = longitude - distance
            east_coord = longitude + distance
            area = [north_coord, west_coord, south_coord, east_coord]
            # print(area)
            convert_nc_to_climate_data_row(date_value, latitude, longitude, area, day_night_ind, result_df)

    result_filename = "../dataset/climatedata/" + file_name + ".csv"
    result_df.to_csv(result_filename)


In [3]:
## Execute convert_nc_to_csv function for each fire data csv file

convert_nc_to_csv("canada_2023")
# convert_nc_to_csv("canada_2022")
# convert_nc_to_csv("canada_2021")
# convert_nc_to_csv("canada_2020")
# convert_nc_to_csv("canada_2019")
# convert_nc_to_csv("canada_2018")
# convert_nc_to_csv("canada_2017")
# convert_nc_to_csv("canada_2016")
# convert_nc_to_csv("canada_2015")
# convert_nc_to_csv("canada_2014")
# convert_nc_to_csv("canada_2013")
# convert_nc_to_csv("canada_2012")
# convert_nc_to_csv("canada_2011")
# convert_nc_to_csv("canada_2010")