## Functions for loading and plotting SD and DALEC data from multiple days

I've already prototyped some of this in `SD_8-20Aug22.ipynb`. So just gonna generalise into useful functions

In [4]:
# probably won't need all of this stuff, but why not import it all?

import SD_raster_loading
import SD_NC_loading
import netCDF4
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import dalecLoad
import spectralConv
import os

In [17]:
# now gonna try to look at all SD files:

def load_multiple_SDs(SD_directory, coord, pixel_grid_shape=(1, 1)):
    '''
    Loads all L2R netcdfs in a given directory. Then extracts a grid of shape=pixel_grid_shape at the given coord
    Returns a pandas DF with Date, Wavelength, and rho_s columns
    Note that the rho_s columns will be formatted as per `SD_NC_loading.get_SD_NC_Spectra_grid()`
    '''
    # could use list comprehensions in a few places here if things start getting slow!
    SD_files = []
    for file in os.listdir(SD_directory):
        if file.endswith("L2R.nc"):
            SD_files.append(os.path.join(SD_directory, file))


    ncdf_dates = []
    indexes = []
    SD_spect_list = []

    for i in range(len(SD_files)):
        f = netCDF4.Dataset(SD_files[i])
        SD_spect = SD_NC_loading.get_SD_NC_Spectra_grid(f, coord[0], coord[1], shape=(3, 3))
        ncdf_dates.append(f.isodate)
        indexes.append(i)
        SD_spect_list.append(SD_spect)
    
    
    # currently my code which removes images from the same date relies on the images being sorted in date order ...
    SD_spect_list_sorted = [x for _, x in sorted(zip(ncdf_dates, SD_spect_list))]
    sorted_dates = sorted(ncdf_dates)
    
    SD_df = None # this is almost definitely uneccesary
    
    # perhaps have removing images from same date as an option??
    for i in range(len(SD_spect_list_sorted)):
        SD_spect = SD_spect_list_sorted[i]
        date = sorted_dates[i]
        if date[:10] != sorted_dates[i-1][:10]: # can't be bothered to deal with multiple images from same day right now
            SD_df_tmp = SD_spect.copy()
            SD_df_tmp['Date'] = pd.to_datetime(date)
            SD_df_tmp['Date'] = SD_df_tmp['Date'].dt.date # just removes the time aspect from the variable
            SD_df_tmp.set_index(['Date', 'Wavelength'], inplace=True)
            if SD_df is None:
                SD_df = SD_df_tmp.copy()
            else:
                SD_df = pd.concat([SD_df, SD_df_tmp])
        else:
            print('...skipping duplicate date entry')
    # don't really need to sort, but in case I change something its good to have
    return SD_df.sort_values(['Date', 'Wavelength']) 

In [26]:
SD_directory = ('C:/Users/daa5/' +
                'OneDrive - University of Stirling/' +
                'PlanetData/' +
                'Airthrey_8-20Aug-TOAR_psscene_analytic_8b_udm2/' +
                'acolite_output/')

coord = [56.14693897799395, -3.923458784671348] # this is approx the location of the DALEC


SD_df = load_multiple_SDs(SD_directory, coord)
SD_df

...skipping duplicate date entry
...skipping duplicate date entry


Unnamed: 0_level_0,Unnamed: 1_level_0,rho_s_49_86,rho_s_49_87,rho_s_49_88,rho_s_50_86,rho_s_50_87,rho_s_50_88,rho_s_51_86,rho_s_51_87,rho_s_51_88
Date,Wavelength,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-08-10,444.0,0.014849082,0.017234027,0.013689813,0.01235939,0.015292178,0.014030832,0.0114720315,0.011847491,0.0111647835
2022-08-10,492.0,0.013610188,0.013732793,0.014223221,0.011954372,0.012537072,0.012107725,0.011647631,0.011893033,0.010819308
2022-08-10,533.0,0.024336988,0.023728922,0.022576505,0.02337684,0.022384407,0.021423724,0.021872072,0.021455752,0.022000153
2022-08-10,566.0,0.02928955,0.026712634,0.025994726,0.028898228,0.027299918,0.025407244,0.028311161,0.027365169,0.024950268
2022-08-10,612.0,0.021671271,0.020159846,0.020778213,0.0168949,0.016929273,0.018407362,0.018407362,0.018029293,0.018441731
2022-08-10,666.0,0.019844772,0.02232088,0.020670263,0.01711617,0.017331634,0.015966881,0.015751362,0.014601784,0.014961051
2022-08-10,707.0,0.04264811,0.041237798,0.038301744,0.03692855,0.0368904,0.033723194,0.030363355,0.032196227,0.030325167
2022-08-10,866.0,0.06340983,0.06696183,0.07192138,0.05546401,0.05813098,0.052272867,0.057346676,0.052115895,0.047928654
2022-08-11,444.0,0.012887616,0.012312349,0.012752273,0.0104163885,0.010788912,0.010890513,0.008485096,0.009332335,0.010484124
2022-08-11,492.0,0.016868625,0.01659519,0.01553158,0.014984443,0.013859486,0.013160004,0.014102763,0.012734146,0.012947078


In [32]:
def load_SD_summarise_multiple_DALEC_days(DALEC_directory, RSR_doves_file='non-DALEC-data/RSR-Superdove.csv',
                                          file_names=None, dalec_summary_function=dalecLoad.uniform_grid_spectra_mean,
                                          DALEC_col_name='DALEC_mean_Rrs'):
    '''
    Loads multiple DALEC log files and then carries out the specified daily summary operation on these.
    Then resamples to superdoves wavebands and saves the data in a nice dataframe with Date, Wavelength and 
    '''
    supported_functions = [dalecLoad.uniform_grid_spectra_mean]
    if dalec_summary_function not in supported_functions:
        print("I've not tested this summary function yet! It might not (probably won't) work! ")
    
    RSR_doves = pd.read_csv(RSR_doves_file)
    
    if file_names is None: # if None, then load all DALEC transect (.dtf) files in the directory
        DALEC_files = []
        for file in os.listdir(DALEC_directory):
            if file.endswith(".dtf"):
                DALEC_files.append(os.path.join(DALEC_directory, file))
    else:
        DALEC_files = [DALEC_directory + file for file in file_names]

    DALEC_df = None # just initialise as None idk
    
    # assuming that the spectral wavelength info is the same for each file
    # this is almost definitely always the case... (unless perhaps we used a different DALEC?)
    spect_wavelengths = dalecLoad.load_DALEC_spect_wavelengths(DALEC_files[0])
    # would be a bit nicer to not hard code these, but would require also reading a SD file
    # see SD_NC_loading.get_SD_NC_Spectra() for how to do this
    doves_wavelengths = [444., 492., 533., 566., 612., 666., 707., 866.]
    
    
    
    # as we know, loading DALEC files isnae that fast...
    # currently some weird stuff will happen if we include log files which are from serial output
    # basically need dalecLoad.load_DALEC_log() to be super robust for this to work! 
    for file in DALEC_files:
        print('loading ... ' + str(file))
        dalec_log = dalecLoad.load_DALEC_log(file)
        mean_spect = dalec_summary_function(dalec_log, spect_wavelengths)
        DALEC_SD = spectralConv.SD_band_calc(RSR_doves, mean_spect['Rrs_mean'].values,
                                             RSR_doves['Wavelength (nm)'].values)
        DALEC_df_tmp = pd.DataFrame(data=DALEC_SD, columns=[DALEC_col_name])
        DALEC_df_tmp['Date'] = pd.to_datetime(dalec_log[' UTC Date'].iloc[0])
        DALEC_df_tmp['Date'] = DALEC_df_tmp['Date'].dt.date # just removes the time aspect from the variable
        DALEC_df_tmp['Wavelength'] = doves_wavelengths
        DALEC_df_tmp.set_index(['Date', 'Wavelength'], inplace=True)
        if DALEC_df is None:
            DALEC_df = DALEC_df_tmp.copy()
        else:
            DALEC_df = pd.concat([DALEC_df, DALEC_df_tmp])

    return DALEC_df





In [34]:
DALEC_df = load_SD_summarise_multiple_DALEC_days('data/log82_89/')
DALEC_df

loading ... data/log82_89/LOG_0082.dtf
loading ... data/log82_89/LOG_0083.dtf
loading ... data/log82_89/LOG_0084.dtf
loading ... data/log82_89/LOG_0085.dtf
loading ... data/log82_89/LOG_0086.dtf
loading ... data/log82_89/LOG_0087.dtf
loading ... data/log82_89/LOG_0088.dtf
loading ... data/log82_89/LOG_0089.dtf


Unnamed: 0_level_0,Unnamed: 1_level_0,DALEC_mean_Rrs
Date,Wavelength,Unnamed: 2_level_1
2022-08-08,444.0,0.001780
2022-08-08,492.0,0.002995
2022-08-08,533.0,0.005177
2022-08-08,566.0,0.006222
2022-08-08,612.0,0.004774
...,...,...
2022-08-15,566.0,0.012465
2022-08-15,612.0,0.008536
2022-08-15,666.0,0.007134
2022-08-15,707.0,0.011419


In [35]:
# join the dfs together and remove NAs so that we only get overlapping dates
superDuperDF = DALEC_df.join(SD_df, on=['Date', 'Wavelength']).dropna() 
superDuperDF

Unnamed: 0_level_0,Unnamed: 1_level_0,DALEC_mean_Rrs,rho_s_49_86,rho_s_49_87,rho_s_49_88,rho_s_50_86,rho_s_50_87,rho_s_50_88,rho_s_51_86,rho_s_51_87,rho_s_51_88
Date,Wavelength,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-08-10,444.0,0.002926,0.014849082,0.017234027,0.013689813,0.01235939,0.015292178,0.014030832,0.0114720315,0.011847491,0.0111647835
2022-08-10,492.0,0.003997,0.013610188,0.013732793,0.014223221,0.011954372,0.012537072,0.012107725,0.011647631,0.011893033,0.010819308
2022-08-10,533.0,0.006104,0.024336988,0.023728922,0.022576505,0.02337684,0.022384407,0.021423724,0.021872072,0.021455752,0.022000153
2022-08-10,566.0,0.007177,0.02928955,0.026712634,0.025994726,0.028898228,0.027299918,0.025407244,0.028311161,0.027365169,0.024950268
2022-08-10,612.0,0.005585,0.021671271,0.020159846,0.020778213,0.0168949,0.016929273,0.018407362,0.018407362,0.018029293,0.018441731
2022-08-10,666.0,0.004232,0.019844772,0.02232088,0.020670263,0.01711617,0.017331634,0.015966881,0.015751362,0.014601784,0.014961051
2022-08-10,707.0,0.006749,0.04264811,0.041237798,0.038301744,0.03692855,0.0368904,0.033723194,0.030363355,0.032196227,0.030325167
2022-08-10,866.0,0.001269,0.06340983,0.06696183,0.07192138,0.05546401,0.05813098,0.052272867,0.057346676,0.052115895,0.047928654
2022-08-11,444.0,0.002647,0.012887616,0.012312349,0.012752273,0.0104163885,0.010788912,0.010890513,0.008485096,0.009332335,0.010484124
2022-08-11,492.0,0.003834,0.016868625,0.01659519,0.01553158,0.014984443,0.013859486,0.013160004,0.014102763,0.012734146,0.012947078


In [None]:
# need to finish this! 

def multiDaySpectraPlot(superDuperDF, )

fig, ax = plt.subplots(1, 3, figsize=(25, 7))

for i, date in zip(range(3), superDuperDF.index.get_level_values(0).unique()):
    x = superDuperDF.index.get_level_values(1).unique()
    y = superDuperDF.loc[[date]]['DALEC_mean_Rrs'].values
    ax[i].plot(x, y, label='DALEC Mean')
    for col in list(superDuperDF.columns.values)[1:]:
        y = superDuperDF.loc[[date]][col].values/np.pi
        ax[i].plot(x, y,
                   color='red',
                   label='SuperDoves @ DALEC loc.',
                   marker='o',
                   alpha=0.2)
    ax[i].set_ylim([0, 0.03])
    ax[i].set_title(str(date))
    
    ax[i].set_xlabel('Wavelength (nm)')
    ax[i].set_ylabel('$R_{rs}$ $(sr^{-1}$)')


    # this code removes duplicate labels in the legend. Naughty
    handles, labels = ax[i].get_legend_handles_labels()
    newLabels, newHandles = [], []
    for handle, label in zip(handles, labels):
        if label not in newLabels:
            newLabels.append(label)
            newHandles.append(handle)
    ax[i].legend(newHandles, newLabels)

    ax[i].grid()
plt.show()