In [None]:
# IMPORT NEEDED LIBRARIES
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import numpy as np
from numpy import asarray
import seaborn as sns
import rioxarray as rxr
import rasterio as rio
from rasterio.plot import plotting_extent
from rasterio.enums import Resampling
import geopandas as gpd
import scipy
from scipy.stats import gaussian_kde
from scipy.stats import stats
from scipy.stats import mode
import seaborn as sns
import xarray as xr
import pandas as pd
import glob
import os
import fnmatch
import math
import statistics as st
from math import e
import matplotlib.ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
#from multispectral_functions import *

In [None]:
# SET IMAGE AND SHAPEFILE DIRECTORIES

# Pre-fire & RGB directory
dirImgLoc = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/sentinel_2b/raw/S2B_MSIL1C_20171128T184719_N0206_R070_T11SKU_20171128T203222.SAFE/GRANULE/L1C_T11SKU_A003814_20171128T184714/IMG_DATA/'

# Post-fire directory
dirImgLocPost = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/sentinel_2b/raw/S2B_MSIL1C_20171228T184749_N0206_R070_T11SKU_20171228T221718.SAFE/GRANULE/L1C_T11SKU_A004243_20171228T184751/IMG_DATA/'

# Shapefile directory, ensure correct number of shapefiles for analysis
dirPolyLoc = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/shapefiles/'

# AVIRIS imagery
pre_h = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/registered_data/171206_av_area_final_clip_warp_warp_warp.dat'

post_h = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/registered_data/171221_av_area_final_warp_warp.dat'

av_dnbr = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/registered_data/dnbr_aviris_warp.dat'


# Output folders
rgb_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/rgb/'
nbr_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/nbr/'
dnbr_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/dnbr/'
usgs_dnbr_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/usgs_dnbr/'
usgs_pha_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/usgs_pha'
ppp_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/ppp/'
fig_out = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/fig/'
general = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/batch_output/general/'
final = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/final_again/'
article = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/manuscript/'
folders = '/mnt/nfs/lss/meerdink/home/skzebarth/masters/folders/'

In [None]:
# ALL FUNCTIONS

def nbr_clip(dir, 
             shape, 
             plot=False, 
             save=False):
    '''Takes in image and clips the raster to study basins.
        
        CLIP OPTIONS:
        all_touched (boolean, OPTIONAL): If True, all pixels touched by geometries
        will be burned in. If false, only pixels whose center is within the polygon 
        or that are selected by Bresenham’s line algorithm will be burned in.
        
        drop (bool, optional): If True, drop the data outside of the extent of the 
        mask geoemtries Otherwise, it will return the same raster with the data masked. 
        Default is True.
        
        invert (boolean, optional): If False, pixels that do not overlap shapes will be
        set as nodata. Otherwise, pixels that overlap the shapes will be set as nodata. 
        False by default.'''
    
    NIR_path = glob.glob(dir + '/*B8A.jp2')
    NIR = rxr.open_rasterio(NIR_path[0], masked=True)
    SWIR_path = glob.glob(dir + '/*B12.jp2')
    SWIR = rxr.open_rasterio(SWIR_path[0], masked=True)
    
    name = os.path.basename(NIR_path[0])
    name_split = name.split('_')
    val = name_split[1]
    date = val[:8]
    sdc = f"{val[4:6]}/{val[6:8]}/{val[0:4]}"
    
    shape = gpd.read_file(os.path.join(dirPolyLoc, filename))
    b_name = filename.split('.')
    basin = b_name[0]
    basin_NIR = NIR.rio.clip(shape.geometry.values,
                                shape.crs,
                                all_touched = False,
                                drop = True,
                                invert = False)
    basin_SWIR = SWIR.rio.clip(shape.geometry.values,
                                shape.crs,
                                all_touched = False,
                                drop = True,
                                invert = False)
    
    #basin_clip.plot()
    
    
    nbr = (basin_NIR - basin_SWIR) / (basin_NIR + basin_SWIR)
    if plot == True:
        nbr.plot(
            cmap = 'gray',
            vmax = 1,
            vmin = -1)
        plt.title(f"Basin {basin} Normalized Burn Ratio: {sdc}")
   
    if save == True:
        outname = f"{date}_{basin}_nbr.tif"
        nbr.rio.to_raster(os.path.join(nbr_out, outname), driver='GTIFF')
    
    return nbr


def av_nbr_clip(image, 
             shape, 
             save=False):
    n = os.path.basename(image)
    n2 = n.split('_')
    date = n2[0]
    image = rxr.open_rasterio(image, masked=True)
    nir = image[62,:,:]
    swir = image[217,:,:]
    shape = gpd.read_file(os.path.join(dirPolyLoc, filename))
    basin_NIR = nir.rio.clip(shape.geometry.values,
                                shape.crs,
                                all_touched = False,
                                drop = True,
                                invert = False)
    
    basin_SWIR = swir.rio.clip(shape.geometry.values,
                                shape.crs,
                                all_touched = False,
                                drop = True,
                                invert = False)
    
    nbr = (basin_NIR - basin_SWIR) / (basin_NIR + basin_SWIR)
    if save == True:
        outname = f"{date}_{basin}_h_nbr.tif"
        nbr.rio.to_raster(os.path.join(nbr_out, outname), driver='GTIFF')
    return nbr
#####
def dnbr(pre_fire, 
         post_fire, 
         plot=False, 
         save=False):
    '''Takes in nbr functions outputs and calculates the differenced normalized burn ratio.
        This function outputs individual basin results.'''
    
    dnbr = pre_fire - post_fire
    
    if plot == True:
        dnbr.plot(
            cmap = 'gray',
            vmax = 1,
            vmin = -1)
        plt.title(f"Basin {basin} Differenced Normalized Burn Ratio")
        
    if save == True:
        outname = f"{basin}_dnbr.tif"
        dnbr.rio.to_raster(os.path.join(dnbr_out, outname), driver='GTIFF')
        
    return dnbr

def avg_dnbr(pre_fire, 
         post_fire, 
         plot=False, 
         save=False):
    '''Takes in nbr functions outputs and calculates the differenced normalized burn ratio.
        This function outputs individual basin results.'''
    
    dnbr = pre_fire - post_fire
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
    dnbrdata = dnbr_flat[x]
    dnbr_val.append(dnbrdata)
    p1 = []
    for i in dnbrdata:
        individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        x_2 = x_values_df['x2'][individual_basin]
        p1.append(x_2)
    
    # Append average array, flatten data, reshape into image
    avg_arr = np.array(p1)
    a_final = avg_arr.flatten()
    pixarr[x] = a_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})

    if plot == True:
        out_dnbr.plot(
            cmap = 'gray',
            vmax = 1,
            vmin = -1)
        plt.title(f"Basin {basin} USGS Differenced Normalized Burn Ratio")
        
    if save == True:
        outname = f"{basin}_usgs_dnbr.tif"
        out_dnbr.rio.to_raster(os.path.join(usgs_dnbr_out, outname), driver='GTIFF')
        
    return out_dnbr
        
def pha(dnbr, 
          plot=False, 
          save=False):
    
    # Coefficients for Southern California
    b = -3.63
    b_1 = 0.41
    b_2 = 0.67
    b_3 = 0.7
    
    
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
        
    # Getting the data from dnbr_flat
    dnbrdata = dnbr_flat[x]
        
    # Matching index of x_values_df[basin_id] to shapefile
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        
    # Pulling individual values from x_1 and x_3, creating pixel values for x_2 denoted by s & s1
    x_1 = x_values_df['x1'][individual_basin]
    x_2 = x_values_df['x2'][individual_basin]
    x_3 = x_values_df['x3'][individual_basin]
       
    # Per pixel analysis, probability stored in empty list p1
    p1 = []
    for i in dnbrdata:
        lnx =  b + (b_1 * x_1 * val) + (b_2 * x_2 * val) + (b_3 * x_3 * val)
        prob = (e ** lnx) / (1.0 + e ** lnx)
        p1.append(prob)
        
    # Append probability array, flatten data, reshape into image
    prob_arr = np.array(p1)
    p_final = prob_arr.flatten()
    pixarr[x] = p_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})
    
    pha_list.append(out_dnbr)
    if plot == True:
        out_dnbr.plot(
            cmap = 'gnuplot',
            vmax = 1,
            vmin = 0)
        plt.title(f"Basin {basin} Initiation Probablilty, R = {int(val*4)}mm/hr")
    
    if save == True:
        outname = f"{basin}_usgsP_{int(val*4)}mmhr.tif"
        out_dnbr.rio.to_raster(os.path.join(usgs_pha_out, outname), driver='GTIFF')
        print(f"Basin {basin} {int(val*4)}mm/hr prob analysis saved as geotiff")
        
    print(f"Basin {basin} {int(val*4)}mm/hr prob analysis complete")
    return out_dnbr

def pha_mode(dnbr, 
          plot=False, 
          save=False):
    
    # Coefficients for Southern California
    b = -3.63
    b_1 = 0.41
    b_2 = 0.67
    b_3 = 0.7
    
    
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
        
    # Getting the data from dnbr_flat
    dnbrdata = dnbr_flat[x]
        
    # Matching index of x_values_df[basin_id] to shapefile
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        
    # Pulling individual values from x_1 and x_3, creating pixel values for x_2 denoted by s & s1
    x_1 = x_values_df['x1'][individual_basin]
    x_2 = mode
    x_3 = x_values_df['x3'][individual_basin]
       
    # Per pixel analysis, probability stored in empty list p1
    p1 = []
    for i in dnbrdata:
        lnx =  b + (b_1 * x_1 * val) + (b_2 * x_2 * val) + (b_3 * x_3 * val)
        prob = (e ** lnx) / (1.0 + e ** lnx)
        p1.append(prob)
        
    # Append probability array, flatten data, reshape into image
    prob_arr = np.array(p1)
    p_final = prob_arr.flatten()
    pixarr[x] = p_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})
    
    pha_mode_list.append(out_dnbr)
    if plot == True:
        out_dnbr.plot(
            cmap = 'gnuplot',
            vmax = 1,
            vmin = 0)
        plt.title(f"Basin {basin} Initiation Probablilty, R = {int(val*4)}mm/hr")
    
    if save == True:
        outname = f"{basin}_usgsP_{int(val*4)}mmhr_mode.tif"
        out_dnbr.rio.to_raster(os.path.join(usgs_pha_out, outname), driver='GTIFF')
        print(f"Basin {basin} {int(val*4)}mm/hr prob analysis saved as geotiff")
        
        print(f"Basin {basin} {int(val*4)}mm/hr prob analysis complete")
    return out_dnbr

def pha_median(dnbr, 
          plot=False, 
          save=False):
    
    # Coefficients for Southern California
    b = -3.63
    b_1 = 0.41
    b_2 = 0.67
    b_3 = 0.7
    
    
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
        
    # Getting the data from dnbr_flat
    dnbrdata = dnbr_flat[x]
        
    # Matching index of x_values_df[basin_id] to shapefile
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        
    # Pulling individual values from x_1 and x_3, creating pixel values for x_2 denoted by s & s1
    x_1 = x_values_df['x1'][individual_basin]
    x_2 = median
    x_3 = x_values_df['x3'][individual_basin]
       
    # Per pixel analysis, probability stored in empty list p1
    p1 = []
    for i in dnbrdata:
        lnx =  b + (b_1 * x_1 * val) + (b_2 * x_2 * val) + (b_3 * x_3 * val)
        prob = (e ** lnx) / (1.0 + e ** lnx)
        p1.append(prob)
        
    # Append probability array, flatten data, reshape into image
    prob_arr = np.array(p1)
    p_final = prob_arr.flatten()
    pixarr[x] = p_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})
    
    pha_median_list.append(out_dnbr)
    if plot == True:
        out_dnbr.plot(
            cmap = 'gnuplot',
            vmax = 1,
            vmin = 0)
        plt.title(f"Basin {basin} Initiation Probablilty, R = {int(val*4)}mm/hr")
    
    if save == True:
        outname = f"{basin}_usgsP_{int(val*4)}mmhr_median.tif"
        out_dnbr.rio.to_raster(os.path.join(usgs_pha_out, outname), driver='GTIFF')
        print(f"Basin {basin} {int(val*4)}mm/hr prob analysis saved as geotiff")
        
        print(f"Basin {basin} {int(val*4)}mm/hr prob analysis complete")
    return out_dnbr

def ppp(dnbr, 
          plot=False, 
          save=False):
    
    # Coefficients for Southern California
    b = -3.63
    b_1 = 0.41
    b2 = 0.67
    b_2 = np.array(b2)
    b_3 = 0.7
    
    
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
        
    # Getting the data from dnbr_flat
    dnbrdata = dnbr_flat[x]
        
    # Matching index of x_values_df[basin_id] to shapefile
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        
    # Pulling individual values from x_1 and x_3, creating pixel values for x_2 denoted by s & s1
    x_1 = x_values_df['x1'][individual_basin]
    x_3 = x_values_df['x3'][individual_basin]
    s = np.multiply(dnbrdata, b_2)
    s1 = np.multiply(s, val)
    # s1 multiplication assumes when function is called that val is defined globally in code.
    
    # Per pixel analysis, probability stored in empty list p1
    p1 = []
    for i in s1:
        lnx =  b + (b_1 * x_1 * val) + i + (b_3 * x_3 * val)
        prob = (e ** lnx) / (1.0 + e ** lnx)
        p1.append(prob)
       
    # Append probability array, flatten data, reshape into image
    prob_arr = np.array(p1)
    p_final = prob_arr.flatten()
    dist_list.append(p_final)
    pixarr[x] = p_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})
    ppp_list.append(out_dnbr)
    if plot == True:
        out_dnbr.plot(
            cmap = 'gnuplot',
            vmax = 1,
            vmin = 0)
        plt.title(f"Basin {basin} Per Pixel Initiation Probablilty, R = {int(val*4)}mm/hr")
    
    if save == True:
        outname = f"{basin}_ppp_{int(val*4)}mmhr.tif"
        out_dnbr.rio.to_raster(os.path.join(ppp_out, outname), driver='GTIFF')
        print(f"Basin {basin} {int(val*4)}mm/hr per pixel analysis saved as geotiff")
        
        print(f"Basin {basin} {int(val*4)}mm/hr per pixel analysis complete")
    return out_dnbr

#####

def rgb(dir, 
        shape, 
        plot=False, 
        save=False):
    
    def normalize(array):
        """Normalizes numpy arrays into scale 0.0 - 1.0"""
        array_min, array_max = array.min(), array.max()
        return ((array - array_min)/(array_max - array_min))
    def resample(rst):
        scale_factor = 1/2
        new_width = rst.rio.width * scale_factor
        new_height = rst.rio.height * scale_factor
        sampled = rst.rio.reproject(rst.rio.crs, 
                                    shape=((int(new_height), int(new_width))),
                                    resampling = Resampling.nearest)
        return sampled
    
    # Set RGB Bands,Sentinel-2 is red(B04), green(B03), blue(B02)
    imageRed = glob.glob(dir + '/*B04.jp2')
    imageGreen = glob.glob(dir + '/*B03.jp2')
    imageBlue = glob.glob(dir + '/*B02.jp2')
    name = os.path.basename(imageRed[0])
    name_split = name.split('_')
    val = name_split[1]
    date = val[:8]
    sdc = f"{val[4:6]}/{val[6:8]}/{val[0:4]}"
    imageRed_open = rxr.open_rasterio(imageRed[0], masked=True)
    imageBlue_open = rxr.open_rasterio(imageBlue[0], masked=True)
    imageGreen_open = rxr.open_rasterio(imageGreen[0], masked=True)
    
    shape = gpd.read_file(os.path.join(dirPolyLoc, filename))
    b_name = filename.split('.')
    basin = b_name[0]
    
    redClip = imageRed_open.rio.clip(shape.geometry.values, 
                                     shape.crs, 
                                     all_touched=False, 
                                     drop=True, 
                                     invert=False)
    blueClip = imageBlue_open.rio.clip(shape.geometry.values, 
                                       shape.crs, 
                                       all_touched=False, 
                                       drop=True, 
                                       invert=False)
    greenClip = imageGreen_open.rio.clip(shape.geometry.values, 
                                         shape.crs, 
                                         all_touched=False, 
                                         drop=True, 
                                         invert=False)
    
    # Normalize clip so rgb display is bright enough
    redn = normalize(redClip)
    bluen = normalize(blueClip)
    greenn = normalize(greenClip)
    
    # Resample from 10m to 20m spatial resolution
    red_rs = resample(redn)
    blue_rs = resample(bluen)
    green_rs = resample(greenn)
    
    # Pull band information from each array
    redData = red_rs.data[0,:,:]
    blueData = blue_rs.data[0,:,:]
    greenData = green_rs.data[0,:,:]
    
    # Create and plot mask
    dataPlot = np.zeros((redData.shape[0],redData.shape[1],3))
    dataPlot[:,:,0] = redData
    dataPlot[:,:,1] = greenData
    dataPlot[:,:,2] = blueData
    dataPlotMask = np.ma.masked_where(np.isnan(dataPlot), dataPlot)
    img = xr.Dataset()
    img = xr.DataArray(dataPlotMask,
                           dims = ('y', 'x', 'band'),
                           coords ={'y': red_rs.coords['y'], 
                                    'x': red_rs.coords['x'],
                                    'band': ["red", "green", "blue"]})
    
    
    # Transpose array for geotiff output
    out_img = img.transpose('band', 'y', 'x')
    
    if plot == True:
        img.plot.imshow()
        plt.title(f'Basin {basin} RGB: {sdc}')
        plt.show()
    #Saving
    if save == True:
        outname = f"{basin}_{date}_rgb.tif"
        out_img.rio.to_raster(os.path.join(rgb_out, outname), driver='GTIFF')
    print(f"Basin {basin} RGB combine complete")
    return out_img

####

def ppp_av(dnbr, 
          plot=False, 
          save=False):
    
    # Coefficients for Southern California
    b = -3.63
    b_1 = 0.41
    b2 = 0.67
    b_2 = np.array(b2)
    b_3 = 0.7
    
    
    dnbr_flat = dnbr.values.flatten()
    pixarr = np.empty((dnbr_flat.shape[0]), dtype= float)
    pixarr[:] = np.nan
    x = np.where(np.isnan(dnbr_flat) == False)
        
    # Getting the data from dnbr_flat
    dnbrdata = dnbr_flat[x]
    
    # Matching index of x_values_df[basin_id] to shapefile
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
        
    # Pulling individual values from x_1 and x_3, creating pixel values for x_2 denoted by s & s1
    x_1 = x_values_df['x1'][individual_basin]
    x_3 = x_values_df['x3'][individual_basin]
    s = np.multiply(dnbrdata, b_2)
    s1 = np.multiply(s, val)
    # s1 multiplication assumes when function is called that val is defined globally in code.
    
    # Per pixel analysis, probability stored in empty list p1
    p1 = []
    for i in s1:
        lnx =  b + (b_1 * x_1 * val) + i + (b_3 * x_3 * val)
        prob = (e ** lnx) / (1.0 + e ** lnx)
        p1.append(prob)
       
    # Append probability array, flatten data, reshape into image
    prob_arr = np.array(p1)
    p_final = prob_arr.flatten()
    dist_list.append(p_final)
    pixarr[x] = p_final
    pixarrImg = pixarr.reshape((dnbr.shape[1], dnbr.shape[2]))
    out_dnbr = xr.Dataset()
    out_dnbr = xr.DataArray(pixarrImg, dims = ('y', 'x'),
                            coords = {'x': dnbr.coords['x'],
                                      'y': dnbr.coords['y']})
    
    if plot == True:
        out_dnbr.plot(
            cmap = 'gnuplot',
            vmax = 1,
            vmin = 0)
        plt.title(f"Basin {basin} Per Pixel Initiation Probablilty, R = {int(val*4)}mm/hr")
    
    if save == True:
        outname = f"{basin}_ppp_{int(val*4)}mmhr.tif"
        out_dnbr.rio.to_raster(os.path.join(ppp_out, outname), driver='GTIFF')
        print(f"Basin {basin} {int(val*4)}mm/hr per pixel analysis saved as geotiff")
        
        print(f"Basin {basin} {int(val*4)}mm/hr per pixel analysis complete")
    return out_dnbr

####

#Loads in the appropriate probability based on USGS Values.
def p_choice(val):
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]
    if val == 3.0:
        p = float(x_values_df['P_3'].iloc[individual_basin])
    elif val == 4.0:
        p = float(x_values_df['P_4'].iloc[individual_basin])
    elif val == 5.0:
        p = float(x_values_df['P_5'].iloc[individual_basin])
    elif val == 6.0:
        p = float(x_values_df['P_6'].iloc[individual_basin])
    elif val == 7.0:
        p = float(x_values_df['P_7'].iloc[individual_basin])
    elif val == 8.0:
        p = float(x_values_df['P_8'].iloc[individual_basin])
    elif val == 9.0:
        p = float(x_values_df['P_9'].iloc[individual_basin])
    elif val == 10.0:
        p = float(x_values_df['P_10'].iloc[individual_basin])
    p_list.append(p)
    return 

def final_compare_nbr2(mnbr_pre, mnbr_post, hnbr_pre, hnbr_post, savefig=False):
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]  
    value1 = np.mean(mnbr_pre)
    mean1 = value1.item()
    value2 = np.mean(mnbr_post)
    mean2 = value2.item()
    value3 = np.mean(hnbr_pre)
    mean3 = value3.item()
    value4 = np.mean(hnbr_post)
    mean4 = value4.item()
    fig, ax = plt.subplots(3, 2, 
                           figsize=(14,18),
                           gridspec_kw={'height_ratios':[1, 1, .8]})
    
    #fig.suptitle(f'Basin {basin} Sentinel-2b vs AVIRIS NBR', 
                  #fontsize=15, fontweight='bold')
    #fig.tight_layout(pad=1)
    #plt.subplots_adjust(left=.5)
    
    ax1 = ax[0,0] 
    ax4 = ax[0,1]
    ax2 = ax[1,0]
    ax5 = ax[1,1]
    ax3 = ax[2,0]
    ax6 = ax[2,1]
    ##
    shape.plot(ax = ax1, alpha=0)   
    mnbr_pre.plot(ax=ax1, 
               cmap = 'RdBu',
               vmax = 1,
               vmin = -1)
    ax1.set_title(f'Sentinel-2b Pre-fire NBR',fontweight="bold", size=18,)
    ax1.axis('off')
    fig_axes1 = ax1.figure.axes
    cbar_axes1 = fig_axes1[-1]
    cbar_axes1.tick_params(labelsize=14)
    
    shape.plot(ax = ax2, alpha=0) 
    hnbr_pre.plot(ax=ax2, 
            cmap = 'RdBu',
            vmax = 1,
             vmin = -1)
    ax2.set_title(f'AVIRIS Pre-fire NBR',fontweight="bold", size=18,)
    ax2.axis('off')
    fig_axes2 = ax2.figure.axes
    cbar_axes2 = fig_axes2[-1]
    cbar_axes2.tick_params(labelsize=14)
    
    mnbr_pre.plot.hist(
                  bins = nbr_bin_val,
                  color = 'c',
                  alpha = .5, label = (f'Sentinel-2b mean: {mean1: .3f}'),
                  edgecolor = 'k', ax=ax3)
    hnbr_pre.plot.hist(
                  bins = nbr_bin_val,
                  color = 'r',
                  alpha = .5, label = (f'AVIRIS mean: {mean3: .3f}'),
                  edgecolor = 'k', ax=ax3)
    ax3.yaxis.set_label_position('right')
    ax3.yaxis.tick_right()
    ax3.tick_params(axis='x', labelsize=14)  
    ax3.tick_params(axis='y', labelsize=14)
    ax3.set_ylabel('Frequency', fontsize = 14)
    ax3.set_title('Pre-fire NBR Distribution',fontweight="bold", size=18,)
    ax3.legend(loc='best', fontsize=12)
    ax3.axvline(mean1, 
                 color='b', 
                 linestyle='solid', 
                 linewidth=2
                 ) 
    ax3.axvline(mean3, 
                 color='r', 
                 linestyle='solid', 
                 linewidth=2
                 ) 
    ##
    
    shape.plot(ax = ax4, alpha=0) 
    mnbr_post.plot(ax=ax4, 
            cmap = 'RdBu',
            vmax = 1,
             vmin = -1)
    ax4.set_title(f'Sentinel-2b Post-fire NBR',fontweight="bold", size=18,)
    ax4.axis('off')
    fig_axes3 = ax4.figure.axes
    cbar_axes3 = fig_axes3[-2]
    cbar_axes3.tick_params(labelsize=14)
    
    shape.plot(ax = ax5, alpha=0) 
    hnbr_post.plot(ax=ax5, 
            cmap = 'RdBu',
            vmax = 1,
             vmin = -1)
    ax5.set_title(f'AVIRIS Post-fire NBR',fontweight="bold", size=18,)
    ax5.axis('off')
    fig_axes4 = ax5.figure.axes
    cbar_axes4 = fig_axes4[-1]
    
    cbar_axes4.tick_params(labelsize=14)
    
    mnbr_post.plot.hist(
                  bins = nbr_bin_val,
                  color = 'c',
                  alpha = .5, label = (f'Sentinel-2b mean: {mean2: .3f}'),
                  edgecolor = 'k', ax=ax6)
    hnbr_post.plot.hist(
                  bins = nbr_bin_val,
                  color = 'r',
                  alpha = .5, label = (f'AVIRIS mean: {mean4: .3f}'),
                  edgecolor = 'k', ax=ax6)
    ax6.set_title( 'Post-fire NBR Distribution',fontweight="bold", size=18,)
    ax6.yaxis.set_label_position('right')
    ax6.tick_params(axis = 'x', labelsize = 14)
    ax6.tick_params(axis = 'y', labelsize = 14)
    ax6.set_ylabel('Frequency', fontsize=14)
    ax6.yaxis.tick_right()
    ax6.legend(loc='best', fontsize = 12)
    ax6.axvline(mean2, 
                 color='b', 
                 linestyle='solid', 
                 linewidth=2
                 ) 
    ax6.axvline(mean4, 
                 color='r', 
                 linestyle='solid', 
                 linewidth=2
                 ) 
    
    if savefig == True:
         filename = os.path.join(article, f'{basin}_nbr_final.pdf')
         plt.savefig(filename, bbox_inches='tight', dpi=1200)   
    #plt.close()
    plt.show()
    return



def final_compare_mh(dnbrm, dnbrh, pppm, ppph, p, R, savefig=False):
    individual_basin = x_values_df.index[x_values_df['basin_id'] == int(basin)]  
    value1 = np.mean(dnbrm)
    mean1 = value1.item()
    value2 = np.mean(dnbrh)
    mean2 = value2.item()
    mmax = np.max(dnbrh)
    fig, ax = plt.subplots(3, 2, 
                           figsize=(14,18),
                           gridspec_kw={'height_ratios':[1, 1, .8]})
    
    #fig.suptitle(f'Basin {basin} Sentinel-2b vs AVIRIS, {int(R*4)}mmhr', 
                  #fontsize=15, fontweight='bold')
    #fig.tight_layout(pad = 4)
    
    ax1 = ax[0,0] 
    ax4 = ax[0,1]
    ax2 = ax[1,0]
    ax5 = ax[1,1]
    ax3 = ax[2,0]
    ax6 = ax[2,1]
    ##
    shape.plot(ax = ax1, alpha=0)   
    dnbrm.plot(ax=ax1, 
               cmap = 'RdBu_r',
               vmax = 1.5,
               vmin = -1.5)
    ax1.set_title(f'Sentinel-2b dNBR',fontweight="bold", size=18,)
    ax1.axis('off')
    fig_axes = ax1.figure.axes
    cbar_axes = fig_axes[-1]
    cbar_axes.tick_params(labelsize=14)
    
    shape.plot(ax = ax2, alpha=0)
    h = dnbrh.plot(ax=ax2, 
             cmap = 'RdBu_r',
             vmax = 1.5,
              vmin = -1.5,
                   add_colorbar=False)
    ax2.set_title(f'AVIRIS dNBR',fontweight="bold", size=18,)
    
    ax2.axis('off')
    fig_axes4 = ax2.figure.axes
    cbar_axes4 = fig_axes4[-3]
    cbar_axes.tick_params(labelsize=14)
    ax2.set_label('cbar_label')
    cbar = plt.colorbar(h)
    cbar.set_label('')
    
    dnbrm.plot.hist(
                  bins = dnbr_bin_val,
                  color = 'c',
                  alpha = .5, label = (f'Sentinel-2b mean: {mean1: .3f}'),
                  edgecolor = 'k', ax=ax3)
    dnbrh.plot.hist(
                  bins = dnbr_bin_val,
                  color = 'r',
                  alpha = .5, label = (f'AVIRIS mean: {mean2: .3f}'),
                  edgecolor = 'k', ax=ax3)
    ax3.yaxis.set_label_position('right')
    ax3.yaxis.tick_right()
    ax3.tick_params(axis='x', labelsize=14)  
    ax3.tick_params(axis='y', labelsize=14) 
    ax3.set_title('dNBR Distribution',fontweight="bold", size=18, y=1.08)
    ax3.set(xlabel='')
    ax3.set_ylabel('Frequency', fontsize=14)
    ax3.legend(loc='best', fontsize=12)
    ax3.axvline(mean1, 
                 color='b', 
                 linestyle='solid', 
                 linewidth=2
                 ) 
    ax3.axvline(mean2, 
                 color='r', 
                 linestyle='solid', 
                 linewidth=2
                 )
    ##
    
    
    shape.plot(ax = ax4, alpha=0) 
    pppm.plot(ax=ax4, 
            cmap = 'gnuplot',
            vmax = 1,
             vmin = 0)
    ax4.set_title(f'Sentinel-2b Per-Pixel Probability',fontweight="bold", size=17,)
    ax4.axis('off')
    fig_axes2 = ax4.figure.axes
    cbar_axes2 = fig_axes2[-1]
    cbar_axes2.tick_params(labelsize=14)
    
    shape.plot(ax = ax5, alpha=0) 
    ppph.plot(ax=ax5, 
            cmap = 'gnuplot',
            vmax = 1,
             vmin = 0)
    ax5.set_title(f'AVIRIS Per-Pixel Probability',fontweight="bold", size=18,)
    ax5.axis('off')
    fig_axes3 = ax5.figure.axes
    cbar_axes3 = fig_axes3[-1]
    cbar_axes3.tick_params(labelsize=14)
    pppm.plot.hist(
                  bins = bin_val,
                  color = 'c',
                  alpha = .5, label = 'MSM1',
                  edgecolor = 'k', ax=ax6)
    ppph.plot.hist(
                  bins = bin_val,
                  color = 'r',
                  alpha = .5, label = 'HSM1',
                  edgecolor = 'k', ax=ax6)
    ax6.set_title('Probability Distribution',fontweight="bold", size=18,)
    ax6.yaxis.set_label_position('right')
    ax6.yaxis.tick_right()
    ax6.set_ylabel('Frequency', fontsize =14)
    ax6.set_xlim([0,1])
    ax6.set_ylim(0, y_max)
    ax6.tick_params(axis = 'x', labelsize = 14)
    ax6.tick_params(axis = 'y', labelsize = 14)
    ax6.axvline(p, 
                 color='g', 
                 linestyle='solid', 
                 linewidth=2,
                 label = f'M1: {p: .3f}'
                 ) 
    ax6.legend(loc='best', fontsize=12)
    
    
    
    if savefig == True:
         filename = os.path.join(article, f'{basin}_{int(R*4)}_final_SentvsAV.pdf')
         plt.savefig(filename, bbox_inches='tight', dpi=1200)   
    #plt.close()
    plt.show()
    return


def kde_plot(x, y, z, p, p2, p3, savefig=False):
    

    
    sns.set(rc={'figure.figsize':(12,6)},style='white')
    sns.histplot(x, kde=True, color='blue',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'MSM1 12 mm/hr')
    sns.histplot(y, kde=True, color='orange',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'MSM1 24 mm/hr')
    sns.histplot(z, kde=True, color='green',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'MSM1 36 mm/hr')
    plt.xlabel('Probability',fontweight="bold", size=16,)
    plt.xlim(0,1)
    plt.ylabel('Frequency',fontweight="bold", size=16)
    #plt.yticks([])
    plt.axvline(x=p, color = 'blue', linestyle='--', label=f'M1 12 mm/hr: {p: .3f}')
    plt.axvline(x=p2, color = 'orange', linestyle='--', label=f'M1 24 mm/hr: {p2: .3f}')
    plt.axvline(x=p3, color = 'green', linestyle='--', label=f'M1 36 mm/hr: {p3: .3f}')
    
    plt.legend(loc='best')
    #plt.legend(labels)
    
    if savefig == True:
         plt.savefig(os.path.join(article, f'{basin}_rdist.pdf'), dpi=1200)   
    #plt.close()
    plt.show()
    return


def kde_plot_h(x, y, z, p, p2, p3, savefig=False):
    

    
    sns.set(rc={'figure.figsize':(12,6)},style='white')
    sns.histplot(x, kde=True, color='blue',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'HSM1 12 mm/hr')
    sns.histplot(y, kde=True, color='orange',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'HSM1 24 mm/hr')
    sns.histplot(z, kde=True, color='green',fill=True, 
                 bins=bin_val, stat="count",alpha=.1, label = 'HSM1 36 mm/hr')
    
    plt.xlabel('Probability',fontweight="bold", size=16,)
    plt.xlim(0,1)
    plt.ylabel('Frequency',fontweight="bold", size=16)
    #plt.yticks([])
    plt.axvline(x=p, color = 'blue', linestyle='--', label=f'M1 12 mm/hr: {p: .3f}')
    plt.axvline(x=p2, color = 'orange', linestyle='--', label=f'M1 24 mm/hr: {p2: .3f}')
    plt.axvline(x=p3, color = 'green', linestyle='--', label=f'M1 36 mm/hr: {p3: .3f}')
    
    plt.legend(loc='best')
    #plt.legend(labels)
    
    if savefig == True:
         plt.savefig(os.path.join(article, f'{basin}_hrdist.pdf'), dpi=1200)   
    #plt.close()
    plt.show()
    return

def count_above(x):
    mean = np.mean(x)
    #mean = 0.476775
    
    count = 0
    
    for val in x:
        if val>mean:
            count += 1
    return count

def count_total(x):
    count = 0
    for val in x:
        count+=1
    return count


def count_50(x):
    count = 0
    for val in x:
        if val > .5:
            count +=1
    return count

def count_90(x):
    count = 0
    for val in x:
        if val > .9:
            count +=1
    return count

def count_99(x):
    count = 0
    for val in x:
        if val > .99:
            count +=1
    return count

In [None]:
#Set USGS x and P values; this is only to generate results shown in related manuscript.
x_values_df = pd.read_csv('/mnt/nfs/lss/meerdink/home/skzebarth/masters/basin_data/ma_data.csv', 
                          delimiter=',')

#Open hyperspectral imagery and set NBR and dNBR bin values.
hyper_dnbr = rxr.open_rasterio(av_dnbr, masked=True)

nbr_bin_val = [-1, -.9, -.8, -.7, -.6, -.5, -.4, -.3, -.2, -.1, 0, .1, .2, .3, 
                            .4, .5, .6, .7, .8, .9, 1]
dnbr_bin_val = [-1, -.9, -.8, -.7, -.6, -.5, -.4, -.3, -.2, -.1, 0, .1, .2, .3, 
                            .4, .5, .6, .7, .8, .9, 1, 1.1, 1.2, 1.3, 1.4, 1.5]

#This code runs entire PHA for an individual or all basins based on user input to filename.endswith()

    #Individual = if filename.endswith('BASIN-ID.shp')
    #all = if filename.endswith('.shp')
for filename in sorted(os.listdir(dirPolyLoc)):
    if filename.endswith('12997.shp'):
        b_name = filename.split('.')
        basin = b_name[0]
        
        shape = gpd.read_file(os.path.join(dirPolyLoc, filename))
        av_dnbr_clip = hyper_dnbr.rio.clip(shape.geometry.values,
                                shape.crs,
                                all_touched = False,
                                drop = True,
                                invert = False)
        
        hypdnbrdata = []
        av_dnbr_flat = av_dnbr_clip.values.flatten()
        av_pixarr = np.empty((av_dnbr_clip.shape[0]), dtype= float)
        av_pixarr[:] = np.nan
        av_x = np.where(np.isnan(av_dnbr_flat) == False)
        av_dnbrdata = av_dnbr_flat[av_x]
        hypdnbrdata.append(av_dnbrdata)
        
        #Uncomment if user desires to create RGB imagery
        #rgb_img = rgb(dirImgLoc, shape, plot=False, save=False)
        #rgb_img2 = rgb(dirImgLocPost, shape, plot=False, save=False)
        
        #Prefire & Postfire imagery nbr function calls. 
        pre_fire_img = nbr_clip(dirImgLoc, shape, plot=False, save=False)
        post_fire_img = nbr_clip(dirImgLocPost, shape, plot=False, save=False)
        h_pre_fire_img = av_nbr_clip(pre_h, shape, save=False)
        h_post_fire_img = av_nbr_clip(post_h, shape, save=False)
        
        #NBR comparison
        test2 = final_compare_nbr2(pre_fire_img, post_fire_img, 
                             h_pre_fire_img, h_post_fire_img, savefig=False)
        
        
        
        #Prep dNBR data to pull out peak frequency and set eventual distribution bin sizes. 
        dnbr_val = []
        avg_dnbr_img = avg_dnbr(pre_fire_img, post_fire_img, plot=False, save=False)
        dnbr_img = dnbr(pre_fire_img, post_fire_img, plot=False, save=False)
        y = np.round(dnbr_val, decimals = 3)
        y_flat = y.flatten()        
        ad = np.array(dnbr_val)
        ax = ad.flatten()
        if basin == '20029' or basin == '21634':
            h_bin = 20
        else:
            h_bin = 50
        hist, bins = np.histogram(ax, bins=h_bin)
        peak_bin_index = np.argmax(hist)
        mode = bins[peak_bin_index]
        print(f'peak frequency is {mode}')
        multi_above = count_above(ax)
        print(f'the number of pixels above the S2 mean is: {multi_above}')
        median = np.nanmedian(dnbr_img)
        ntest = scipy.stats.normaltest(ax)
        ntestsw = scipy.stats.shapiro(ax)
        
        #Rainfall and bin setup
        R = [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
        bin_val = [0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55,
           0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0]
        dnbr_bin_val = [-1, -.9, -.8, -.7, -.6, -.5, -.4, -.3, -.2, -.1, 0, .1, .2, .3, 
                        .4, .5, .6, .7, .8, .9, 1, 1.1, 1.2, 1.3, 1.4, 1.5]
        
        #Creation of empty lists to store xarrays for future plotting.
        pha_list = []
        pha_mode_list = []
        pha_median_list = []
        ppp_list = []
        ppp_hyper_list = []
        dist_list = []
        p_list = []
        
        #PHA for each rainfall intensity design storm. 
        for val in R:
            p = p_choice(val)
            pha_img = pha(dnbr_img, plot=False, save=False)
            pha_mode_img = pha_mode(dnbr_img, plot=False, save=False)
            pha_median_img = pha_median(dnbr_img, plot=False, save=False)
            ppp_img = ppp(dnbr_img, plot=False, save=False)
            ppp_hyp = ppp_av(av_dnbr_clip)
            ppp_hyper_list.append(ppp_hyp)

            print(f'Per Pixel Analysis complete for basin {basin}, with a design storm of {int(val)*4} mm/hr.')

        #This code ensures that the Y-axis of any basins plotted probability distribution is equal to the maximum across all rainfall intensities. 
        bf = []
        for val in dist_list:
            test_bin = pd.cut(val, bin_val)
            x = max(test_bin.value_counts())
            bf.append(x)

        y_max = round(max(bf) +10)
        print(f'Basin {basin} Debris Flow Likelihood analysis completed with all R values')
        
        
        #Creates dataframe of xarray data to ensure each data point pulled is associated with the appropriate rainfall intensity
        fig_list_df = pd.DataFrame({
            'R': R,
            'pha': pha_list,
            'ppp': ppp_list,
            'ppp_h': ppp_hyper_list,
            'pha_mode': pha_mode_list,
            'pha_median': pha_median_list,
            'p': p_list
        })
    
        #Generation of Figures.
        #KDE setup for multi & hyper
        multi_rainfall_int = []
        r12 = dist_list[0]
        r16 = dist_list[2]
        r20 = dist_list[4]
        r24 = dist_list[6]
        r28 = dist_list[8]
        r32 = dist_list[10]
        r36 = dist_list[12]
        r40 = dist_list[14]
        multi_rainfall_int.append(r12)
        multi_rainfall_int.append(r16)
        multi_rainfall_int.append(r20)
        multi_rainfall_int.append(r24)
        multi_rainfall_int.append(r28)
        multi_rainfall_int.append(r32)
        multi_rainfall_int.append(r36)
        multi_rainfall_int.append(r40)


        r_list_df = pd.DataFrame({
                        '12mm/hr': r12,
                        '24mm/hr': r24,
                        '36mm/hr': r36

                    })

        p36 = p_list[6]
        p24 = p_list[3]
        p12 = p_list[0]

        r36h = dist_list[1]
        r24h = dist_list[7]
        r12h = dist_list[11]

        r_list_dfh = pd.DataFrame({
                        '12mm/hr': r12h,
                        '24mm/hr': r24h,
                        '36mm/hr': r36h

                    })

        hyper_rainfall_int = []
        r12h = dist_list[1]
        r16h = dist_list[3]
        r20h = dist_list[5]
        r24h = dist_list[7]
        r28h = dist_list[9]
        r32h = dist_list[11]
        r36h = dist_list[13]
        r40h = dist_list[15]
        hyper_rainfall_int.append(r12h)
        hyper_rainfall_int.append(r16h)
        hyper_rainfall_int.append(r20h)
        hyper_rainfall_int.append(r24h)
        hyper_rainfall_int.append(r28h)
        hyper_rainfall_int.append(r32h)
        hyper_rainfall_int.append(r36h)
        hyper_rainfall_int.append(r40h)
        
        #KDE imagery
        count = 3
        for array in multi_rainfall_int:

            x = count_50(array)
            y = count_90(array)
            a = count_99(array)
            z = x/len(array) *100
            z1 = y/len(array) *100
            z2 = a/len(array) *100
            print(f"for an RI of {count *4}, pix above .5 and .9 = {x} ({z: .2f}%) and {y} ({z1: .2f}%).")
            print(f"pix above .99 = {a} ({z2: .2f}%)")
            count+=1

        counth = 3
        hyper_plot = []
        for array in hyper_rainfall_int:

            x = count_50(array)
            y = count_90(array)
            a = count_99(array)
            z = x/len(array) *100
            z1 = y/len(array) *100
            z2 = a/len(array) *100
            print(f"for an RI of {counth *4}, pix above .5 and .9 = {x} ({z: .2f}%) and {y} ({z1: .2f}%).")
            print(f"pix above .99 = {a} ({z2: .2f}%)")
            counth+=1
        multi_kde = kde_plot(multi_rainfall_int[0], multi_rainfall_int[3], 
                         multi_rainfall_int[6], p12, p24, p36, savefig=False)
        hyper_kde = kde_plot_h(hyper_rainfall_int[0], hyper_rainfall_int[3], hyper_rainfall_int[6],
                            p12, p24, p36, savefig=False)
        
        #General basin plotting. 
        for i in range(len(fig_list_df)):

            fig = final_compare_mh(dnbr_img, av_dnbr_clip, 
                             fig_list_df.loc[i, 'ppp'], fig_list_df.loc[i, 'ppp_h'],
                             fig_list_df.loc[i, 'p'],fig_list_df.loc[i, 'R'], 
                             savefig=False)