This notebook defines some functions that can be read into the primary notebook and applied on the EUSEDcollab database. The first is a basic function to get all of the unique years contained in the database. This allows a total count of the number of entries covering a particular year to be made.

In [10]:
import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import os
import json
import ipynb

In [11]:
def get_n_unique_years(md_all):
    '''
    Produce an array with all unique years in the database. E.g. if a record 
    starts in 2000 and ends in 2010, an integer is produced for each year 
    between the range. This is done for the whole database so that the number of
    entries for each unique year can be calculated.

    Parameters
    ----------
    md_all : DATAFRAME
        The dataframe with the full metadata table  of EUSEDcollab

    Returns
    -------
    List
    An array with all unique years for the entire EUSEDcollab database

    '''
    years_all = []
    for i in np.arange(len(md_all)):
        y1 = md_all['Start year'].values[i]
        y2 = md_all['End year'].values[i]
        #create an array of years between start and end
        years = np.arange(y1, y2 + 1, 1)
        years_all.extend(years)
    
    unique_years = pd.DataFrame(years_all, columns = ['year'])
    return unique_years 

Define a function to return some average annual quantifications for each time series input. This gives a relatively simple example of how we can extract annual average sediment yields from the data time series while taking into account the completeness of each record (e.g. removing years in which there are an insufficient number of data points).

In [12]:
def get_avg_y_vals(ts_path, d_format, min_completeness):
    '''
    Get summary statistics (annual average values) of a time series entry 
    of EUSEDcollab. The function returns quantifications of 'SedY_med t yr-1', 
    'SedY_mean t yr-1', 'SedY_CV', 'Q_med m3 yr-1','Q_mean m3 yr-1','n_years_in_avg'
    to summarise each time series. A minimum completeness is required to remove years
    from the average that contained an insufficient number of data points.

    Parameters
    ----------
    ts_path : STRING
        The path to the csv file with the time series entry of EUSEDcollab
    d_format : STRING
        The data time series type
    min_completeness : INTEGER
        The minimum % completeness of the data entries for each year to include
        in the average.

    Returns
    -------
    Dictionary
        A dictionary with quantifications and their names
    '''
    #read time series based on provided csv path
    ts = pd.read_csv(ts_path)
    #find the relevant columns and assign the to variables
    for col in ts.columns:
        if 'SSL' in col:
            sed_y_col = col
        if 'Q' in col and 'max' not in col:
            Q_col = col
        if 'date' in col:
           date_col = col
        elif 'Date' in col:
            date_col= col
            
    #set the dataframe index to the datetime column
    ts = ts.set_index(pd.to_datetime(ts[date_col]))
        
    #Make an initial evaluation of the number of nan values in the time series 
    #if it is exceeds 50%, skip the record
    
    pcnt_missing = (ts[sed_y_col].isna().sum() / len(ts)) * 100
    if pcnt_missing  > 50:
        skip_ts = True
    else:
        skip_ts = False
        
    #do calculations if time series is sufficiently complete for annual average stats
    if skip_ts == False:
        #resample the dataframe based on the annual sum
        ts_y = ts.resample('Y').sum()
        
        #define the data formats that are fixed. With these we can determine 
        #how complete the each year is
        fixed_formats = ['Daily data - fixed timestep', 'Monthly data']
        #if data is a fixed timestep format we can disclude years with insufficient data
        if any(d_format in x  for x in fixed_formats):
            ts_y_count = ts.resample('Y').count().rename(columns = {sed_y_col: 'Count'})
            ts_y = ts_y.merge(ts_y_count['Count'], left_index = True, right_index = True)
            if d_format == 'Daily data - fixed timestep':
                ts_y['%_complete'] = ts_y['Count']/365 * 100
            elif d_format == 'Monthly data':
                ts_y['%_complete'] = ts_y['Count']/12 * 100
            ts_y = ts_y[ts_y['%_complete'] > min_completeness]
        
          
        #get some annual average values from the timeseries
        #convert sediment values to tonnes 
        sed_yield_med_t = ts_y[sed_y_col].median()/1000
        sed_yield_mean_t = ts_y[sed_y_col].mean()/1000
        ssy_yield_min_t = ts_y[sed_y_col].min()/1000
        ssy_yield_max_t = ts_y[sed_y_col].max()/1000
        
        #get the coefficient of variation 
        sed_yield_cv = ts_y[sed_y_col].std() / ts_y[sed_y_col].mean()
        
        Q_y_med_m3 = ts_y[Q_col].median()
        Q_y_mean_m3 = ts_y[Q_col].mean()

        #get the number of years included in the average
        n_years = ts_y[sed_y_col].count()
    #time series has many nan values, assign nan values to results
    else:
        sed_yield_med_t = sed_yield_mean_t = sed_yield_cv = Q_y_med_m3 = Q_y_mean_m3 = n_years = ssy_yield_min_t = ssy_yield_max_t = np.nan
    #create 2 lists with results and variable names
    vals = [sed_yield_med_t, sed_yield_mean_t, sed_yield_cv, Q_y_med_m3, Q_y_mean_m3, 
            n_years, ssy_yield_min_t, ssy_yield_max_t]
    cols = ['SedY_med t yr-1', 'SedY_mean t yr-1', 'SedY_CV', 'Q_med m3 yr-1',
            'Q_mean m3 yr-1','N years in avg', 'SedY_min t yr-1', 
            'SedY_max t yr-1']
    
    #combine lists into a dictionary to return from function
    results = dict(zip(cols, vals))
    
    return results

Define some plotting functions to be able to create some summary-level plots of the EUSEDcollab database. These two functions give overviews of the relationships between: 1) the specific annual average sediment yield as a function of the catchment area, and 2) the annual average sediment yield as a function of the annual average discharge.

In [13]:
def plot_sedY_vs_area(md_all, colour_var):
    
    xvar = 'Drainage area km2'
    yvar = 'SedY_mean t km-2 yr-1'
    

    f, ax = plt.subplots(figsize=(15, 10))
    sns.despine(f, left=True, bottom=True)
    sns.scatterplot(data = md_all, x = xvar, y = yvar, 
                        hue = colour_var, size = colour_var, sizes=(20, 300), ax = ax)

    
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_ylabel('Specific sediment yield (t $\mathregular{km^{-2}}$ $\mathregular{yr^{-1}}$)')
    ax.set_xlabel('Catchment drainage area ($\mathregular{km^{2}}$)')
    #ax.grid(False)
    plt.legend(title='N years in average') #labels=['<= 5', '<= 10', '<= 15', '<= 25', '<= 30'])

  
def plot_sedY_vs_Q(md_all, colour_var, size_var, export = False, export_path = None):
    
    xvar = 'Q_mean m3 yr-1'
    yvar = 'SedY_mean t yr-1'
    est_upper = 'SedY_max t yr-1' 
    est_lower = 'SedY_min t yr-1'
    

    md_all = md_all[md_all[yvar].notna()]
    t = md_all[[xvar, yvar, est_upper, est_lower]].copy(deep = True)
    t['diff_lower'] = md_all[yvar].values - md_all[est_lower].values
    t['diff_upper'] = md_all[est_upper].values - md_all[yvar].values
    
    
    e_lower = md_all[yvar].values - md_all[est_lower].values
    e_array = np.reshape(e_lower, (1, len(e_lower)))
    e_upper = md_all[est_upper].values - md_all[yvar].values
    e_array = np.insert(e_array, 1, np.reshape(e_upper, (1, len(e_upper))), axis = 0)
    
    test_array = [e_lower, e_upper]
    
    f, ax = plt.subplots(figsize=(15, 10))
    sns.despine(f, left=True, bottom=True)
    
    ax.errorbar(x = md_all[xvar], y = md_all[yvar], yerr = test_array, 
                fmt = 'none', elinewidth = 10, alpha = 0.3, capsize = 15)
    
    sns.scatterplot(data = md_all, x = xvar, y = yvar,
                        hue = colour_var, size = size_var,
                        sizes=(100, 600), ax = ax)
    

    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_ylabel('Annual average sediment yield (t $\mathregular{yr^{-1}}$)')
    ax.set_xlabel('Annual average water discharge ($\mathregular{m^{3}}$ $\mathregular{yr^{-1}}$)')
    #ax.set_xlim(0, md_all[xvar].max() + 10 ** 9)
    #ax.set_ylim(0, md_all[yvar].max() + 10 ** 5)
    #ax.grid(False)
    plt.legend(title='N years in average') #labels=['<= 5', '<= 10', '<= 15', '<= 25', '<= 30'])
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

Now we define a function for individual catchment-level plots. We can use this later to inspect some individual time series of catchments that we may be interested in analysing.

In [14]:
def generate_catchment_plots_old(df_ts, data_format):
    '''
    Generate a selection of statistical overview plots from the time series 
    record of an individual catchment in EUSEDcollab

    Parameters
    ----------
    df_ts : DATAFRAME
        The dataframe with an individual time series from a catchment
    data_format : STRING
        The time series format of the data

    '''
    #find the relevant column keys
    
    for col in df_ts.columns:
        if 'SSL' in col:
            sed_key = col
        elif 'Q' in col:
            Q_key = col
        elif 'SSC' in col:
            ssc_key = col
    
    #check if an ssc column was found. Not all datasets have it (e.g. monthly data)
    if 'ssc_key' in locals():
        ssc_exists = True
    else:
        ssc_exists = False
        
    
    #create a dataframe with the annual sum by resampling time series
    #aggregate to annual and plot bar chart 
    ts_y = df_ts.resample('Y').sum()
    ts_y['Year'] = ts_y.index.year
    #convert to tonnes
    ts_y['SSL (t yr-1)'] =  ts_y[sed_key] / 1000
    
    

    #format dataframes to get relevant fields for plotting
    #this is done in a specific way for each time series type
    if data_format == 'Daily data - fixed timestep':
        
        sed_key_t = 'SSL (t d-1)'
        #count days in annual average 
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        #remove values under 1 kg d-1 - insignificant
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        #get ssl in tonnes
        df_ts_h[sed_key_t] = df_ts_h[sed_key] / 1000
        #log ssl
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        #define the labels for plotting
        ly1 = 'Number of days in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ d^{-1}}$)'
        t2 = 'Daily sediment yield'
        ly3 = "Sediment load ($\mathregular{kg \ d^{-1}}$)"
        lx3 = "Water discharge ($\mathregular{m^{3} \ day^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ d^{-1}}$)"
        t4 = 'Daily sediment yield distribution'
        n_ts = 366

    elif data_format == 'Event data - variable timestep' or data_format == 'Event data - fixed timestep':
        
        sed_key_t = 'SSL (t event-1)'
        #count the number of events per year. needs an event index column
        df_ts['Year'] = df_ts.index.year
        #count the number of events
        ts_y['Count'] = df_ts.groupby('Year')['Event_index'].nunique().values
        #if data is not daily, give an event count 
        #aggregate the timeseries into events
        df_ts_h = df_ts.groupby('Event_index').sum()
        #replace the ssc with the mean instead of sum
        df_ts_h[ssc_key] = df_ts.groupby('Event_index').mean()[ssc_key]
        #add the first date of the event as the index 
        df_ts_h.index = df_ts.groupby('Event_index').first()['Date (DD/MM/YYYY)']
        #get ssl in tonnes
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        #log ssl
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'Number of events in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ event^{-1}}$)'
        t2 = 'Event sediment yield'
        ly3 = "Sediment load ($\mathregular{t \ event^{-1}}$)"  
        lx3 = "Water discharge ($\mathregular{m^{3} \ event^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        t4 = 'Event sediment yield distribution'
        

    elif data_format == 'Event data - aggregated':
        
        sed_key_t = 'SSL (t event-1)'
        #count the number of events per year. needs an event index column
        df_ts['Year'] = df_ts.index.year
        #get the count per year
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        #remove insignificant events
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'Number of events in data record'
        ly2 = 'Suspended sediment load (t event-1)'
        t2 = 'Event seidment yield'
        ly3 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        lx3 = "Water discharge ($\mathregular{m^{3} \ event^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        t4 = 'Event sediment yield distribution'

    elif data_format == 'Monthly data':
        sed_key_t = 'SSL (t month-1)'
        #count the months in the annual sum
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        
        #remove values under 1 kg d-1 - insignificant
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'Number of months in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ month^{-1}}$)'
        t2 = 'Monthly sediment yield'
        ly3 = "Sediment load ($\mathregular{t \ month^{-1}}$)"
        lx3 = Q_key
        ly4 = "Sediment load ($\mathregular{t \ month^{-1}}$)"
        t4 = 'Monthly sediment yield distribution'
        n_ts = 13
        
    else:
        sys.exit('specify a compatible data format')
    
    #set the year as the dataframe index
    ts_y = ts_y.set_index('Year')
    #add a column with the month 
    df_ts_h['Month'] = df_ts_h.index.month
    
    #set the plotting style and initiate a subplots figure
    sns.set_style('darkgrid')
    sns.set(font_scale = 2)
    fig = plt.figure(figsize=(25,10), constrained_layout = True) # Create matplotlib figure   
    
    #the first subplot is a bar chart of the annual SSL and a count of the data
    #contained in the sum (e.g. n events or n days/months)
    ax = fig.add_subplot(2,2,1) # Create matplotlib axes
    ax2 = ax.twinx() # Create another axes that shares the same x-axis as a
    width = .3
    ts_y['SSL (t yr-1)'].plot(kind='bar',ax=ax,width=width, position=1, alpha = 0.8)
    ts_y['Count'].plot(kind='bar', edgecolor = 'black', fill = False , ax=ax2, width = width,position=0)
    ax.grid(False)
    ax.set_xlabel("Year")
    ax.set_ylabel("Sediment load ($\mathregular{t \ yr^{-1}}$)")
    ax2.set_ylabel(ly1)
    if data_format == 'Daily data - fixed timestep' or data_format == 'Monthly data':
        ax2.set_ylim((0, n_ts))
    ax.grid(False)
    ax2.grid(False)
    
    #the second plot is a histogram of the sediment yield distribution
    ax = fig.add_subplot(2,2,2) # Create matplotlib axes
    width = .3
    ax.hist(x = df_ts_h[sed_key_t], bins = 100, log = True, color = 'black',
            alpha=.5)
    ax.set_ylabel("Count")
    ax.set_xlabel(ly2)
    ax.grid(False)
    
    #the third is a plot of the discharge vs the ssl
    ax = fig.add_subplot(2,2,3) # Create matplotlib axes
    if ssc_exists != False:
        sns.scatterplot(x = Q_key, y = sed_key_t, hue = ssc_key , data = df_ts_h, ax = ax) 
    else:
        sns.scatterplot(x = Q_key, y = sed_key_t, data = df_ts_h, ax = ax) 
    
    if not data_format == 'Monthly data':
        sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
        
    ax.set_ylabel(ly3)
    ax.set_xlabel(lx3)
    ax.grid(False)
    
    #the last are violin plots of the sediment load distribution per month
    ax = fig.add_subplot(2,2,4) # Create matplotlib axes
    sns.boxplot(x = 'Month', y = sed_key_t, data = df_ts_h, ax = ax, color = 'black', 
                boxprops=dict(alpha=.5))
    ax.set_ylabel(ly4)
    ax.set_xlabel('Month')
    ax.grid(False)

The function below is used to create overview plots of the timeseries data for a desired catchment in EUSEDcollab. The function needs a time series dataframe for the catchment as well as a 'data_format' string (extracted from the catchment metadata) in order to format the data in the appropriate way.

In [None]:
def generate_catchment_plots(df_ts, data_format, plot = True):
    '''
    Generate a selection of statistical overview plots from the time series 
    record of an individual catchment in EUSEDcollab

    Parameters
    ----------
    df_ts : DATAFRAME
        The dataframe with an individual time series from a catchment
    data_format : STRING
        The time series format of the data

    '''
    #find the relevant column keys
    
    for col in df_ts.columns:
        if 'SSL' in col:
            sed_key = col
        elif 'Q' in col:
            Q_key = col
        elif 'SSC' in col:
            ssc_key = col
    
    #check if an ssc column was found. Not all datasets have it (e.g. monthly data)
    if 'ssc_key' in locals():
        ssc_exists = True
    else:
        ssc_exists = False
        
    
    #create a dataframe with the annual sum by resampling time series
    #aggregate to annual and plot bar chart 
    ts_y = df_ts.resample('Y').sum()
    ts_y['Year'] = ts_y.index.year
    #convert to tonnes
    ts_y['SSL (t yr-1)'] =  ts_y[sed_key] / 1000
    
    

    #format dataframes to get relevant fields for plotting
    #this is done in a specific way for each time series type
    if data_format == 'Daily data - fixed timestep':
        
        sed_key_t = 'SSL (t d-1)'
        #count days in annual average 
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        #remove values under 1 kg d-1 - insignificant
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        #get ssl in tonnes
        df_ts_h[sed_key_t] = df_ts_h[sed_key] / 1000
        #log ssl
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        #define the labels for plotting
        ly1 = 'N days in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ d^{-1}}$)'
        t2 = 'Daily sediment yield'
        ly3 = "Sediment load ($\mathregular{kg \ d^{-1}}$)"
        lx3 = "Water discharge ($\mathregular{m^{3} \ day^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ d^{-1}}$)"
        t4 = 'Daily sediment yield distribution'
        n_ts = 366

    elif data_format == 'Event data - variable timestep' or data_format == 'Event data - fixed timestep':
        
        sed_key_t = 'SSL (t event-1)'
        #count the number of events per year. needs an event index column
        df_ts['Year'] = df_ts.index.year
        #count the number of events
        ts_y['Count'] = df_ts.groupby('Year')['Event_index'].nunique().values
        #if data is not daily, give an event count 
        #aggregate the timeseries into events
        df_ts_h = df_ts.groupby('Event_index').sum()
        #replace the ssc with the mean instead of sum
        df_ts_h[ssc_key] = df_ts.groupby('Event_index').mean()[ssc_key]
        #add the first date of the event as the index 
        df_ts_h.index = df_ts.groupby('Event_index').first()['Date (DD/MM/YYYY)']
        #get ssl in tonnes
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        #log ssl
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'N events in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ event^{-1}}$)'
        t2 = 'Event sediment yield'
        ly3 = "Sediment load ($\mathregular{t \ event^{-1}}$)"  
        lx3 = "Water discharge ($\mathregular{m^{3} \ event^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        t4 = 'Event sediment yield distribution'
        

    elif data_format == 'Event data - aggregated':
        
        sed_key_t = 'SSL (t event-1)'
        #count the number of events per year. needs an event index column
        df_ts['Year'] = df_ts.index.year
        #get the count per year
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        #remove insignificant events
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'N events in data record'
        ly2 = 'Suspended sediment load (t event-1)'
        t2 = 'Event seidment yield'
        ly3 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        lx3 = "Water discharge ($\mathregular{m^{3} \ event^{-1}}$)" 
        ly4 = "Sediment load ($\mathregular{t \ event^{-1}}$)"
        t4 = 'Event sediment yield distribution'

    elif data_format == 'Monthly data':
        sed_key_t = 'SSL (t month-1)'
        #count the months in the annual sum
        ts_y['Count'] = df_ts[sed_key].resample('Y').count()
        
        #remove values under 1 kg d-1 - insignificant
        df_ts_h = df_ts[df_ts[sed_key] >= 1].copy(deep=True)
        df_ts_h[sed_key_t] = df_ts_h[sed_key]/1000
        df_ts_h['log ' +  sed_key_t] = np.log(df_ts_h[sed_key])
        
        #define the labels for plotting
        ly1 = 'N months in data record'
        ly2 = 'Suspended sediment load ($\mathregular{t \ month^{-1}}$)'
        t2 = 'Monthly sediment yield'
        ly3 = "Sediment load ($\mathregular{t \ month^{-1}}$)"
        lx3 = Q_key
        ly4 = "Sediment load ($\mathregular{t \ month^{-1}}$)"
        t4 = 'Monthly sediment yield distribution'
        n_ts = 13
        
    else:
        sys.exit('specify a compatible data format')
    
    #set the year as the dataframe index
    ts_y = ts_y.set_index('Year')
    #add a column with the month 
    df_ts_h['Month'] = df_ts_h.index.month
    
    if plot == True:
    
        #set the plotting style and initiate a subplots figure
        sns.set_style('darkgrid')
        sns.set(font_scale = 2.5)
        fig = plt.figure(figsize=(25,10), constrained_layout = True) # Create matplotlib figure   
        
        #the first subplot is a bar chart of the annual SSL and a count of the data
        #contained in the sum (e.g. n events or n days/months)
        ax = fig.add_subplot(2,2,1) # Create matplotlib axes
        ax2 = ax.twinx() # Create another axes that shares the same x-axis as a
        width = .3
        ts_y['SSL (t yr-1)'].plot(kind='bar',color = 'royalblue', ax=ax,width=width, position=1, alpha = 0.8)
        ts_y['Count'].plot(kind='bar', edgecolor = 'black', fill = False , ax=ax2, width = width,position=0)
        ax.grid(False)
        ax.set_xlabel("Year")
        ax.set_ylabel("Sediment load ($\mathregular{t \ yr^{-1}}$)")
        ax2.set_ylabel(ly1)
        if data_format == 'Daily data - fixed timestep' or data_format == 'Monthly data':
            ax2.set_ylim((0, n_ts))
        ax.grid(False)
        ax2.grid(False)
        
        #the second plot is a histogram of the sediment yield distribution
        ax = fig.add_subplot(2,2,2) # Create matplotlib axes
        width = .3
        ax.hist(x = df_ts_h[sed_key_t], bins = 50, log = True, color = 'royalblue',
                alpha=.8)
        ax.set_ylabel("Count")
        ax.set_xlabel(ly2)
        ax.grid(False)
        
        #the third is a plot of the discharge vs the ssl
        ax = fig.add_subplot(2,2,3) # Create matplotlib axes
        if ssc_exists != False:
            sns.scatterplot(x = Q_key, y = sed_key_t, hue = ssc_key, 
                            palette = 'mako', data = df_ts_h, s = 150, ax = ax) 
        else:
            sns.scatterplot(x = Q_key, y = sed_key_t, data = df_ts_h, ax = ax) 
        
        sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
        ax.set_ylabel(ly3)
        ax.set_xlabel(lx3)
        ax.set_yscale('log')
        ax.set_xscale('log')
        ax.grid(False)
        
        #the last are violin plots of the sediment load distribution per month
        ax = fig.add_subplot(2,2,4) # Create matplotlib axes
        sns.boxplot(x = 'Month', y = sed_key_t, data = df_ts_h, ax = ax, color = 'royalblue', 
                    boxprops=dict(alpha=.8))
        ax.set_ylabel(ly4)
        ax.set_xlabel('Month')
        ax.set_yscale('log')
        ax.grid(False)