In [11]:
#import fwiVis.fwiVis as fv
import s3fs
s3 = s3fs.S3FileSystem(anon=False)
from math import cos, asin, sqrt
import re

import numpy as np
import geopandas as gpd
import pandas as pd
from matplotlib import pyplot as plt
import os
import rioxarray as rio
import xarray as xr
import rasterio
import glob
from shapely.errors import ShapelyDeprecationWarning
from shapely.geometry import Point
import warnings
import folium
import datetime
import time
from folium import plugins
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
import contextily as cx
from shapely.geometry import box
import sys
from datetime import datetime, timedelta
from itertools import chain
from bs4 import BeautifulSoup # I mamba installed bs4
import requests

from datetime import date


sys.path.insert(0, '/projects/old_shared/fire_weather_vis/base-fwi-vis/')
import fwiVis.fwiVis as fv



This is a notebook for extracting gridded FWI data assosiated with largefire files. This is differente from the "Fire_timeline_extraction" notebook because this notebook assumes two things:
1) That the fire perimeters that are being assosiated with gridded FWI data are a big GDF generated from "remerge fix", where each fireID represents the *unmerged* portion of the fire. So if fires A and B will evenatually merge, the FWI will be assosiated with the centriod of "A", the centroid of "B" and eventually the centroid of "A_B". The DF of the centroids is "old_shared/fire_weather_vis/Lightning_analysis/lf_centroids/March27_centroids_from_lf_kept_umerged_merged_seperate.csv". 

2) That the fire perimeters only need to be assosiated with FWI, not the raw VIIRS timing, or candidate lightning strikes as the fire_Timeline_extraction function does. 

In [2]:
## Steps
# - go into remerge_fix and write out the unmerged dataset (April_1_unmerged_fires.csv)
# - read in
# - steal the merging piece from the previously writen function -- might even eb easier honestly, and could be acomplished with just a bunch of merges? Maybe a groupby merge apply? 
# - Go re_do some of the supression stuff to see if it was sensitive to that
# - be free!!!!!! Yay!!!

In [56]:
fires = fv.prep_fire_files("/projects/old_shared/fire_weather_vis/Lightning_analysis/fwi_timeline_only/April_1_unmerged_fires.csv") 
#fires[fires.fireID.str.contains("_")]

In [None]:
#centroids = fv.prep_fire_files("/projects/old_shared/fire_weather_vis/Lightning_analysis/fwi_timeline_only/April_1_unmerged_fires.csv")

In [58]:
fires = fires[~fires.geometry.isna()] ## Woops, forgot to subset by just wheen the fires are active
fires = fires[['fireID', 't', 'geometry','n_pixels',
       'n_newpixels', 'farea', 'fperim', 'flinelen', 'duration', 'pixden',
       'meanFRP']] # Lat and lon weren't really doing anything, were NaN. 

In [59]:
fires.columns

Index(['fireID', 't', 'geometry', 'n_pixels', 'n_newpixels', 'farea', 'fperim',
       'flinelen', 'duration', 'pixden', 'meanFRP'],
      dtype='object')

In [60]:
def get_nccs_url(pattern, url = 'https://portal.nccs.nasa.gov/datashare/GlobalFWI/ForecastFWIEXPERIMENTAL/QuebecAllFires.Radius.25.km.401.biggestFires/GEOS-5/GEOS-5.IMERGEARLY/chicletDataNoSmoothing/', ext = 'csv'):    

#url = 'https://portal.nccs.nasa.gov/datashare/GlobalFWI/ForecastFWIEXPERIMENTAL/QuebecAllFires.Radius.25.km.216.biggestFires/GEOS-5/GEOS-5.IMERGEARLY/chicletDataNoSmoothing/'
#ext = 'csv'
    pattern = pattern + "_Lat"
    file_list = []
    for file in listFD(url, ext):
        file_list.append(file)

    try_pd = pd.DataFrame(file_list, columns= ["urls"])
    size = try_pd[try_pd.urls.str.contains(pattern)].urls.values.size
    if(size == 0):
        print("No matches found to pattern. Returning None.")
        return(None)
    if(size >= 2):
        print("Multiple matches found:")
        print(try_pd[try_pd.urls.str.contains(pattern)].urls.values)
        raise ValueError()
    url = try_pd[try_pd.urls.str.contains(pattern)].urls.values[0]
    return(url)

def get_gridded_fwi(fireID):
    
    # Get the URL for the file
    fireID = str(fireID)
    pattern = "FWI." + fireID
    url = get_nccs_url(pattern = pattern)
    
    if(url is not None):
        
        # Get the DF
        grid_FWI = pd.read_csv(url)
        # Change names
        grid_FWI = grid_FWI.rename(columns={'INITDATE': 't', 
                                 "0":"FWI",
                                 "1":"FWI_lead_1",
                                 "2":"FWI_lead_2",
                                 "3":"FWI_lead_3",
                                 "4":"FWI_lead_4",
                                 "5":"FWI_lead_5",
                                 "6":"FWI_lead_6",
                                 "7":"FWI_lead_7",
                                 "8":"FWI_lead_8"
                                })
        # Change dates
        grid_FWI.t = grid_FWI.t.astype("datetime64[ns]").dt.strftime('%Y-%m-%d 12:00:00')

        # return
        return(grid_FWI)
    else:
        return(None)

def listFD(url, ext=''):
    page = requests.get(url).text
    #print(page)
    soup = BeautifulSoup(page, 'html.parser')
    return [url + '/' + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]
    

In [68]:
### Define funciton for groupby apply

def get_unmerged_gridded_fwi(df):
    fid = str(*df.fireID.unique())
    #print(fid)
    gridded = get_gridded_fwi(fid)
    gridded.t = gridded.t.astype("datetime64[ns]")
    gridded = gridded[gridded.t >= df.t.min()] # Only give me the parts of the timeseries that overlap with the fire itself
    gridded = gridded[gridded.t <= df.t.max()]
    df = df.merge(gridded, on = ['t'], how = 'outer')
    return(df)


### run the function 
fires.t = fires.t.astype("datetime64[ns]")
fires_fwi = fires.groupby("fireID").apply(get_unmerged_gridded_fwi)
    



In [70]:
### Write out
fires_fwi.to_csv("/projects/old_shared/fire_weather_vis/Lightning_analysis/fwi_timeline_only/April_1_unmerged_fires_with_FWI.csv")

In [20]:
str(*fires[fires.fireID == '9791'].fireID.unique())

'9791'

In [41]:
fires[fires.fireID == "12156"]

Unnamed: 0,fireID,t,geometry,n_pixels,n_newpixels,farea,fperim,flinelen,duration,pixden,meanFRP
98802,12156,2023-07-03 00:00:00,"POLYGON ((3459232.273 1336913.119, 3459216.258...",62.0,62.0,7.982687,10.982714,10.815178,0.0,7.766809,1.89871
98803,12156,2023-07-03 12:00:00,"MULTIPOLYGON (((3458707.167 1338743.537, 34587...",265.0,132.0,19.119226,24.109721,20.877198,0.5,13.860394,35.443561


In [55]:
fires[fires.fireID.str.contains("10713_10054")].fireID.unique()

array(['10713_10054'], dtype=object)

In [52]:
len(fires.fireID.unique())

348

In [44]:
fires[fires.fireID.str.contains("_")]

Unnamed: 0,fireID,t,geometry,n_pixels,n_newpixels,farea,fperim,flinelen,duration,pixden,meanFRP


In [None]:
fires[fires.fireID.str.contains("_")]