In [2]:
#import fwiVis.fwiVis as fv
import s3fs
s3 = s3fs.S3FileSystem(anon=False)
from math import cos, asin, sqrt
import re

import numpy as np
import geopandas as gpd
import pandas as pd
from matplotlib import pyplot as plt
import os
import rioxarray as rio
import xarray as xr
import rasterio
import glob
from shapely.errors import ShapelyDeprecationWarning
from shapely.geometry import Point
import warnings
import folium
import datetime
import time
from folium import plugins
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 
import contextily as cx
from shapely.geometry import box
import sys
from datetime import datetime, timedelta
from itertools import chain

sys.path.insert(0, '/projects/old_shared/fire_weather_vis/base-fwi-vis/')
import fwiVis.fwiVis as fv

In [45]:
### Function fire_timeline

def concat_subsets(files):
    df = []
    for f in files:
        manyfr = pd.read_csv(f)

        manyfr = gpd.GeoDataFrame(manyfr)

        manyfr.t = manyfr.t.astype("datetime64[ns]")
        df.append(manyfr)
    df = pd.concat(df)
    return(df)

def get_lt(lt_string = "Lt_CA_Boreal_"):
    files = glob.glob("/projects/old_shared/fire_weather_vis/Lightning_analysis/computed_data/" + lt_string +"*.csv")
    return(concat_subsets(files))


def fire_timeline(fireID, 
                  lt,
                  year = '2023',
                  path_region="QuebecGlobalNRT_3571" , 
                  check_last = False, 
                  FWI_source = "station" ):
    
    '''
    '''
    
    ## Read in the largefire file of the fireID
    fr = fv.load_large_fire(fireID, year = year, path_region= path_region) ## Cluster of 2 fires. 

        ## TO DO Filter? 
            ## VIIRS Static source filter?
            ## WUI filter? 
            
    ## Subset lightning by time and space
    fr = fr.to_crs("3571")
    lt = gpd.GeoDataFrame(lt, geometry=gpd.points_from_xy(lt.lon, lt.lat), crs=4326) #4674
    lt = lt.to_crs("3571")
    
    ## TO DO: Figure out which CA ecoregion/province the fire is in and subset lighting by that? 
    print("Not yet subseting spatially beyond quebec. Assuming quebec bounding box")
    
    min_threshold = fr.t.astype('datetime64[ns]').min() - timedelta(days = 10)
    possible_lt = lt[lt.t <= fr.t.min()]
    possible_lt = possible_lt[possible_lt.t >= min_threshold]

    oldest_perim = fr[fr.t == fr.t.max()]
    first_perim = fr[fr.t == fr.t.min()]
    first_perim.geometry = first_perim.buffer(750*2) ## Two viirs pixels???
    join_lt = gpd.sjoin(possible_lt, first_perim, predicate = 'within', how = "inner")
    
    if len(join_lt) == 0:
        join_lt["num_candidates"] = 0
        join_lt["num_strikes"] = len(possible_lt)
        join_lt["num_strikes_10_days"] = len(possible_lt)
    else:
        ## Extract "denominator" or the # of trikes from same period
        denominator = possible_lt[possible_lt.t >= join_lt.t_left.min()]
        denominator = denominator[denominator.t <= join_lt.t_left.max()]
        join_lt["num_candidates"] = len(join_lt)
        join_lt["num_strikes"] = len(denominator)
        join_lt["num_strikes_10_days"] = len(possible_lt)
        
    ## Get distance to individuals ignitions
    # fr["perim_rank"] = fr.t.rank()
    # first_geom = fr[fr.perim_rank == 1].geometry
    # first_geom = first_geom.iloc[0]
    # num_starts = len(first_geom.geoms)
    # for i in range(0, num_starts):
    #     join_lt["dist_start_" + str(i)] = join_lt.distance(first_geom.geoms[i].centroid)
    #     print(fr[fr.perim_rank == 1].to_crs("4326").geometry.iloc[0].geoms[i].centroid)
        
    ## Rank candidate by distance
    # range_geoms = list(range(0, num_starts))
    # string = "dist_start_"
    # columns_dists = [string + str(x) for x in range_geoms]
    # top = len(join_lt) * 1 # Top 100%. Could cut to smaller range
    # dist_bool = join_lt[columns_dists].rank() <= top ## NEED a max distance cutoff. 
    # join_lt["candidate"] = dist_bool.any(axis = 1)
    
    ## Get raw VIIRS pixel timing
    date_string = fr.t.astype("datetime64[ns]").max().strftime("%Y%m%d%p")
    raw_obs_times = fv.raw_pixel_times(int(fireID), date_string = date_string)
    raw_obs_times = raw_obs_times.reset_index()
    
    ## get station data
    if(FWI_source == "station"):
        print("Assuming Single Quebec Station. 718270-99999.")
        st = pd.read_csv("s3://veda-data-store-staging/EIS/other/station-FWI/19900101.NRT/FWI/718270-99999.linear.HourlyFWIFromHourlyInterpContinuous.csv") ## Corrected record from Robert
        st.HH = st.HH.astype("int")
        st.YYYY = st.YYYY.astype("int")
        st.MM = st.MM.astype("int")
        st.DD = st.DD.astype("int")
        st = fv.date_convert(st)
        
        st_rm = st[["time", "TEMP_C", 'RH_PERC', 'VPD_HPA', 'WDSPD_KPH',
       'PREC_MM', 'SNOWD_M', 'VIS_KM', 'FFMC', 'DMC', 'DC', 'BUI', 'ISI',
       'FWI', 'OBSMINUTEDIFF_TEMP', 'OBSMINUTEDIFF_RH', 'OBSMINUTEDIFF_WDSPD',
       'ISPRECREPORTED', 'OBSMINUTEDIFF_SNOW', 'OBSMINUTEDIFF_VIS']]
        st_rm = st_rm.rename(columns = {"time":"t"})
        #### Subset station data by time. 
        st_rm = st_rm[st_rm.t >= min_threshold]
        st_rm = st_rm[st_rm.t <= fr.t.max()]
        
    else:
        #print("No other FWI extraction method ready. Sorry. ")
        raise Exception("No other FWI extraction method ready. Sorry. ")
    
    ## Do merging of all dfs 
    foo = join_lt[["InterCloud", "t_left", "lat_left", "lon_left", "current_mag", "error_elps", "num_station"]]
    foo = foo.rename(columns = {"t_left":"t", "lat_left":"lat", "lon_left":"lon"})
    foo.t = foo.t.astype('datetime64[ns]')
    raw_obs_times = raw_obs_times.rename(columns={"count": "viirs_pix_count"}) 
    raw_obs_times.t = raw_obs_times.t.astype("datetime64[ns]")
    merged = foo.merge(raw_obs_times, on = ["t"], how = "outer")
        
    fr_rm = fr.rename(columns = {"lat":"lat_centroid", "lon":"lon_centroid"})
    fr_rm.t = fr_rm.t.astype("datetime64[ns]")
    merged = merged.merge(fr_rm, on = ["t"], how = "outer")
    
    merged = merged.merge(st_rm, on = ["t"], how = "outer")
    merged["fireID"] = fireID
    
    return(merged)
    

def lf_ids(year = None, regnm = 'backup_BOREAL_NRT_3571_DPS_largefire'):
    
    diroutdata = "s3://maap-ops-workspace/shared/gsfc_landslides/FEDSoutput-s3-conus/"

    if year == None:
        year = date.today().year

    if diroutdata.startswith("s3://"):
        # Can't use glob for S3. Use s3.ls instead.
        import s3fs
        s3 = s3fs.S3FileSystem(anon=False)
        s3path = os.path.join(diroutdata, regnm, str(year), "Largefire")
        fnms = [f for f in s3.ls(s3path)]


    fnms.sort()
    ids = []
    for f in fnms:
        fnm_lts = os.path.basename(f) 
        one_id = fnm_lts[1:-11]
        ids.append(one_id)
    tmp_ids = pd.DataFrame(ids, columns=["ids"])
    tmp_ids = tmp_ids.ids.unique()
    return(tmp_ids.values)

def unique(list1):
 
    # insert the list to the set
    list_set = set(list1)
    # convert the set to the list
    unique_list = (list(list_set))
    return(unique_list)

def get_listed_ids(quebec_stats):
    newlist = [x.strip('][\n').split(' ') for x in quebec_stats.fireID.unique()]
    newlist = list(chain(*newlist))
    newlist = [x.replace('\n', ' ') for x in newlist]
    newlist = unique(newlist)
    return(newlist)
    

In [25]:
# lt = get_lt()       

# tmp = fire_timeline('615', lt = lt, path_region="QuebecGlobalNRT_DPS") #QuebecGlobalNRT_3571

# tmp
    

In [None]:
# date_range = pd.date_range(start = "2023-05-01 12:00:00", end = "2023-07-01 12:00:00", freq="12H")
# #date_range_format = datetime.strptime(date_rage, 
# date_snap = date_range.strftime("%Y%m%d%p")

In [4]:
## Get IDs. These IDs come from csvs made by old_shared/fire_weather_vis/Lightning_analysis/snap_prov_lightning.ipynb
# by going through the snapshot files, doing a spatial join, and collecting IDs. 

files = glob.glob("/projects/old_shared/fire_weather_vis/Lightning_analysis/snap_stats//boreal_snapstats*.csv")

fire_stats = concat_subsets(files)
#fire_stats = pd.read_csv("/projects/old_shared/fire_weather_vis/Lightning_analysis/snap_stats/boreal_snapstats_20231024.csv")

In [46]:
quebec_stats = fire_stats[fire_stats.prov_name_en == "Quebec"]

tmp_list = get_listed_ids(quebec_stats)

In [47]:
tmp_list

['',
 '7593',
 '12517 ',
 '13019',
 '8623',
 '11372',
 '1131',
 '13984 ',
 '8985',
 '12831 ',
 '8276',
 '14853 ',
 '2493',
 '12150',
 '12652',
 '17236 ',
 '9249',
 '11086 ',
 '17022 ',
 '16039',
 '8630',
 '10010',
 '2199',
 '12628',
 '12280',
 '10054',
 '8867',
 '10522',
 '12199',
 '12147',
 '2201 ',
 '10717',
 '17021',
 '14653 ',
 '10396 ',
 '18653 ',
 '7606 ',
 '11117',
 '13269',
 '14275 ',
 '12332 ',
 '18248',
 '16112',
 '10789',
 '12360',
 '18418 ',
 '14653',
 '17126 ',
 '16456',
 '1132',
 '12201',
 '18101',
 '11384',
 '12458 ',
 '11161',
 '9918 ',
 '1672',
 '15051',
 '8343 ',
 '11375',
 '8569',
 '7003 ',
 '14665 ',
 '12385',
 '11470',
 '8605',
 '5693 ',
 '15554 ',
 '9917',
 '18648',
 '12278 ',
 '14854 ',
 '10844 ',
 '8042',
 '13342 ',
 '18247',
 '16651',
 '15333 ',
 '13984',
 '10756',
 '11374 ',
 '10809',
 '12832 ',
 '13670',
 '9031 ',
 '11357 ',
 '12655 ',
 '11804',
 '18787',
 '8536',
 '15476 ',
 '10546',
 '16042',
 '10543 ',
 '8635 ',
 '13679 ',
 '10786 ',
 '9348',
 '2793',
 '16

In [30]:
#.strip('][').split(' ')

?quebec_stats.fireID.strip

Object `quebec_stats.fireID.strip` not found.


In [36]:
newlist = [x.strip('][\n').split(' ') for x in quebec_stats.fireID.unique()]

In [42]:
from itertools import chain

third_list = list(chain(*newlist))
third_list = [x.replace('\n', ' ') for x in third_list]


In [43]:
third_list = unique(third_list)

In [44]:
third_list

['',
 '7593',
 '12517 ',
 '13019',
 '8623',
 '11372',
 '1131',
 '13984 ',
 '8985',
 '12831 ',
 '8276',
 '14853 ',
 '2493',
 '12150',
 '12652',
 '17236 ',
 '9249',
 '11086 ',
 '17022 ',
 '16039',
 '8630',
 '10010',
 '2199',
 '12628',
 '12280',
 '10054',
 '8867',
 '10522',
 '12199',
 '12147',
 '2201 ',
 '10717',
 '17021',
 '14653 ',
 '10396 ',
 '18653 ',
 '7606 ',
 '11117',
 '13269',
 '14275 ',
 '12332 ',
 '18248',
 '16112',
 '10789',
 '12360',
 '18418 ',
 '14653',
 '17126 ',
 '16456',
 '1132',
 '12201',
 '18101',
 '11384',
 '12458 ',
 '11161',
 '9918 ',
 '1672',
 '15051',
 '8343 ',
 '11375',
 '8569',
 '7003 ',
 '14665 ',
 '12385',
 '11470',
 '8605',
 '5693 ',
 '15554 ',
 '9917',
 '18648',
 '12278 ',
 '14854 ',
 '10844 ',
 '8042',
 '13342 ',
 '18247',
 '16651',
 '15333 ',
 '13984',
 '10756',
 '11374 ',
 '10809',
 '12832 ',
 '13670',
 '9031 ',
 '11357 ',
 '12655 ',
 '11804',
 '18787',
 '8536',
 '15476 ',
 '10546',
 '16042',
 '10543 ',
 '8635 ',
 '13679 ',
 '10786 ',
 '9348',
 '2793',
 '16

In [None]:
fires = []
for n,i in enumerate(ids, start = 0):
    try:
        foo = centroid_fire(i)

    except Exception as e:
        print("Error at ID: ",i,)
        continue

    fires.append([foo.lat.iloc[0], foo.lon.iloc[0], foo.farea.iloc[0], foo.data_source.iloc[0]])
    #print(fires)
    if((n%5 == 0) | (n == (len(ids) -1))):
        print(i)
        fr_pd = pd.DataFrame(fires, columns=["lat", "lon", "farea", "data_source"])
        fr_pd.to_csv("/projects/old_shared/fire_weather_vis/Lightning_analysis/lf_centroids/"+"fire_centroids_" + regnm +".csv")