# Solid ice discharge in southeast Greenland fjords

## Purpose
This notebook generates figures and spreadsheets of solid ice discharge for each fjord in the study. The plots show the discharge time series as well as the mean annual, mean hydrological year, and mean seasonal discharge for each fjord. The spreadsheets contain cumulative and per-fjord statistics about solid ice discharge.

## Requirements
This notebook analyzes data from Mankoff _et al._ (2020) which can be downloaded here: https://doi.org/10.22008/promice/data/ice_discharge/d/v02. Wherever you locate those files, indicate the path in the user parameters below.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pathlib

In [None]:
# Set user parameters
startdate = '2015-01-01'
enddate = '2019-12-31'
threshold = 0.5 # coverage threshold for solid ice discharge, used by Mankoff
solid_ice_dir = '../IceDischarge_Mankoff/' # path to files downloaded from https://doi.org/10.22008/promice/data/ice_discharge/d/v02

In [None]:
# Read in file that relates Moon pointID, Joughin glacierID, and Mankoff gate ID
id_map = pd.read_csv('./relate_pointID_glacierID.txt', delimiter=' ')
id_map.set_index('pointID', inplace=True)
id_map.head()

In [None]:
# Read in file that relates glacier ID to fjord number
# glacier_fjord_db = pd.read_excel('../Databases/Glacier database.xlsx')
glacier_fjord_db = pd.read_csv('./glaciers_fjords.txt', delimiter=',')
glacier_fjord_db.head()

In [None]:
# Read in solid ice discharge time series at each gate
gate_discharge = pd.read_csv(f'{solid_ice_dir}/gate_D.csv')
gate_discharge.set_index('Date', inplace=True)
gate_discharge.head()

In [None]:
# Read in error in solid ice discharge time series at each gate
gate_error = pd.read_csv(f'{solid_ice_dir}/gate_err.csv')
gate_error.set_index('Date', inplace=True)
gate_error.head()

In [None]:
# Read in coverage in solid ice discharge time series at each gate
gate_coverage = pd.read_csv(f'{solid_ice_dir}/gate_coverage.csv')
gate_coverage.set_index('Date', inplace=True)
gate_coverage.head()

In [None]:
# Define functions

def getMankoffID(pointID, id_map):
    """Find Mankoff gate ID associated with Moon glacier point ID"""
    mankoffID = id_map.loc[pointID].mankoffID
    if mankoffID is not np.nan:
        mankoffID = mankoffID.split(',')
    return mankoffID

def timeSeries(mankoffID, gate_discharge, gate_error, gate_coverage):
    """Create single dataframe with time series of discharge, error, and coverage at a Mankoff gate"""
    discharge = gate_discharge[mankoffID]
    error = gate_error[mankoffID]
    coverage = gate_coverage[mankoffID]
    data = pd.DataFrame(index=pd.to_datetime(discharge.index), data={'discharge': discharge.values.flatten(), 'error': error.values.flatten(), 'coverage': coverage.values.flatten()})
    return data

def filterDates(data, startdate, enddate):
    """Filter data to samples between start and end dates (inclusive)"""
    data = data[data.index >= startdate]
    data = data[data.index <= enddate]
    return data

def filterCoverage(data, threshold):
    """Filter data to points at or above a given threshold coverage"""
    data = data[data.coverage >= threshold]
    return data

def rmse(error_data):
    """Calculate root mean squared error (RMSE) when calculating mean of data"""
    rmse = sum([error**2 for error in error_data])**0.5 / len(error_data)**0.5
    return rmse

def season_aggregate(dates):
    """Output seasons from list of dates. December is rolled over to following year."""
    def get_season(date):
        if date.month in [1, 2]:
            season = '{}-01'.format(date.year)
        elif date.month in [3, 4, 5]:
            season = '{}-04'.format(date.year)
        elif date.month in [6, 7, 8]:
            season = '{}-07'.format(date.year)
        elif date.month in [9, 10, 11]:
            season = '{}-10'.format(date.year)
        elif date.month in [12]:
            season = '{}-01'.format(date.year+1)
        return season
    seasons = pd.to_datetime([get_season(date) for date in dates], format='%Y-%m')
    return seasons

def hydroyear_aggregate(dates):
    """Output hydrological year (September start) from list of dates"""
    def get_hydroyear(date):
        if date.month >= 9:
            hydroyear = '{}-03'.format(date.year+1)
        elif date.month < 9:
            hydroyear = '{}-03'.format(date.year)
        return hydroyear
    hydroyears = pd.to_datetime([get_hydroyear(date) for date in dates], format='%Y-%m')
    return hydroyears

def getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold):
    """Get gate discharge, error, and coverage for a point ID and filter by dates and coverage. Returns pandas dataframe or nan."""
    mankoff = getMankoffID(pointID, id_map)
    if mankoff is not np.nan:
        if len(mankoff) == 1:
            data = timeSeries(mankoff, gate_discharge, gate_error, gate_coverage)
            data = filterDates(data, startdate, enddate)
            data = filterCoverage(data, threshold)
        else:
            gates_data = [timeSeries(id, gate_discharge, gate_error, gate_coverage) for id in mankoff]
            gates_data = [filterDates(df, startdate, enddate) for df in gates_data]
            gates_data = [filterCoverage(df, threshold) for df in gates_data]
            combined_discharge = sum([df.discharge for df in gates_data])
            combined_error = sum([df.error**2 for df in gates_data])**0.5
            combined_coverage = sum([df.coverage for df in gates_data])/len(gates_data)
            data = pd.DataFrame(index=combined_discharge.index, data={'discharge': combined_discharge, 'error': combined_error, 'coverage': combined_coverage})
            data = data.dropna()
    else:
        print('No discharge data associated with glacier point {}'.format(pointID))
        data = np.nan
    return data

def meanAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold):
    """Calculate mean annual discharge from full time series of data"""
    data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if data is not np.nan:
        mean_annual_data = data.groupby(data.index.year).agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
        mean_annual_data.index = pd.to_datetime(['{}-07'.format(y) for y in mean_annual_data.index], format='%Y-%m')
    else: mean_annual_data = np.nan
    return mean_annual_data

def meanHydroAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold):
    """Calculate mean hydrological year discharge from full time series of data (hydroyear starts September)"""
    data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if data is not np.nan:
        data['hydroyear'] = hydroyear_aggregate(data.index)
        mean_hydroyear_data = data.groupby('hydroyear').agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
    else: mean_hydroyear_data = np.nan
    return mean_hydroyear_data

def meanSeasonalDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold):
    """Calculate mean seasonal discharge from full time series of data"""
    data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if data is not np.nan:
        data['season'] = season_aggregate(data.index)
        mean_seasonal_data = data.groupby('season').agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
    else: mean_seasonal_data = np.nan
    return mean_seasonal_data

def getFjordNumber(pointID):
    """Get fjord number associated with a given glacier point ID"""
    fjord_number = int(glacier_fjord_db.where(glacier_fjord_db['GlacierID'] == pointID)['FjordID'].dropna().values)
    return fjord_number

def plotGlacierDischarge(ax, data, pointID):
    """Plot solid ice discharge time series for a glacier (given by point ID)"""
    if data is not np.nan:
        ax.plot(data.index, data.discharge, '.-')
        ax.fill_between(data.index, data.discharge + data.error, data.discharge - data.error, alpha=0.2, label='error')
        ax.set_xlabel('Time')
        ax.set_ylabel('Discharge [Gt yr$^{-1}$]')
        ax.set_title(f'Glacier #{pointID} discharge\n(Fjord #{getFjordNumber(pointID)})')
        ax.grid('on')


## Complete discharge time series for individual glaciers

In [None]:
# Plot individual glacier
pointID = 147
data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate=startdate, enddate=enddate, threshold=threshold)
if data is not np.nan:
    fig, ax = plt.subplots()
    plotGlacierDischarge(ax, data, pointID)
    ax.set_xticks(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01', '2020-01-01'])
    ax.set_xticklabels(['2015', '2016', '2017', '2018', '2019', '2020'])
    plt.show()

## Mean annual discharge time series for individual glaciers

In [None]:
pointID = 147
mean_annual_data = meanAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)

if mean_annual_data is not np.nan:
    fig, ax = plt.subplots()
    plotGlacierDischarge(ax, mean_annual_data, pointID)
    ax.set_xticks(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01'])
    ax.set_xticklabels(['2015', '2016', '2017', '2018', '2019'])
    ax.set_title('Glacier #{} mean annual discharge'.format(pointID))
    plt.show()

## Mean hydrological year discharge time series for individual glaciers

In [None]:
pointID = 147
mean_hydroyear_data = meanHydroAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
if mean_hydroyear_data is not np.nan:
    fig, ax = plt.subplots()
    plotGlacierDischarge(ax, mean_hydroyear_data, pointID)
    ax.set_xticks(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01', '2020-01-01'])
    ax.set_xticklabels(['2015', '2016', '2017', '2018', '2019', '2020'])
    ax.set_title('Glacier #{} mean hydrological year discharge'.format(pointID))
    plt.show()

## Mean seasonal discharge time series for individual glaciers

In [None]:
pointID = 147
mean_seasonal_data = meanSeasonalDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
if mean_seasonal_data is not np.nan:
    fig, ax = plt.subplots()
    plotGlacierDischarge(ax, mean_seasonal_data, pointID)
    ax.set_xticks(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01', '2020-01-01'])
    ax.set_xticklabels(['2015', '2016', '2017', '2018', '2019', '2020'])
    ax.set_title('Glacier #{} mean seasonal discharge'.format(pointID))
    plt.show()

## Combine all plots

In [None]:
pointID = 175
data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
mean_annual_data = meanAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
mean_hydroyear_data = meanHydroAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
mean_seasonal_data = meanSeasonalDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)

if data is not np.nan:
    fig, ax = plt.subplots()
    plotGlacierDischarge(ax, data, pointID)
    mean_seasonal_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
    mean_annual_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
    mean_hydroyear_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
    ax.grid('on')
    ax.legend(handles=ax.get_lines(), labels=['discharge', 'seasonal mean', 'annual mean', 'hydrological year mean'], loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

# Save figures for all glaciers

In [None]:
pathlib.Path('../figures/solid_ice_discharge').mkdir(parents=True, exist_ok=True)
for pointID in id_map.index:
    data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)

    if data is not np.nan:
        mean_annual_data = meanAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
        mean_hydroyear_data = meanHydroAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
        mean_seasonal_data = meanSeasonalDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)

        fig, ax = plt.subplots()
        plotGlacierDischarge(ax, data, pointID)
        mean_seasonal_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
        mean_annual_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
        mean_hydroyear_data.discharge.plot(ax=ax, style='.-', alpha=0.8)
        ax.grid('on')
        ax.legend(handles=ax.get_lines(), labels=['discharge', 'seasonal mean', 'annual mean', 'hydrological year mean'], loc='center left', bbox_to_anchor=(1, 0.5))

        plt.savefig('../figures/solid_ice_discharge/g{:03}_discharge_fjord{}.png'.format(pointID, getFjordNumber(pointID)), bbox_inches='tight', dpi=300)
        plt.close()

# Calculate mean means

In [None]:
discharge_data = pd.DataFrame(index=id_map.index, columns=['Fjord number', 'Cumulative discharge (Gt)', 'Cumulative error (Gt)', 'Mean annual mean discharge (Gt/yr)', 'Mean annual mean discharge error (Gt/yr)', 'Mean hydroyear mean discharge (Gt/yr)', 'Mean hydroyear mean discharge error (Gt/yr)', 'Mean winter mean discharge (Gt/yr)', 'Mean winter mean discharge error (Gt/yr)', 'Mean spring mean discharge (Gt/yr)', 'Mean spring mean discharge error (Gt/yr)', 'Mean summer mean discharge (Gt/yr)', 'Mean summer mean discharge error (Gt/yr)', 'Mean autumn mean discharge (Gt/yr)', 'Mean autumn mean discharge error (Gt/yr)'])

for pointID in id_map.index:

    # Fjord number
    fjord_number = getFjordNumber(pointID)
    fjord_number = int(fjord_number)
    
    # Cumulative discharge
    data = getDischargeData(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate=startdate, enddate=enddate, threshold=threshold)
    if data is not np.nan:
        cumulative_discharge = data.agg({'discharge': 'sum', 'error': 'sum'})
    else:
        cumulative_discharge = pd.DataFrame(index=[pointID], data={'discharge': np.nan, 'error': np.nan, 'coverage': np.nan})
    
    # Mean annual mean
    mean_annual_data = meanAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if mean_annual_data is not np.nan:
        mean_annual_mean = mean_annual_data.agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
    else:
        mean_annual_mean = pd.DataFrame(index=[pointID], data={'discharge': np.nan, 'error': np.nan, 'coverage': np.nan})

    # Mean hydrological year mean
    mean_hydroyear_data = meanHydroAnnualDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if mean_hydroyear_data is not np.nan:
        mean_hydroyear_mean = mean_hydroyear_data.agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
    else:
        mean_hydroyear_mean = pd.DataFrame(index=[pointID], data={'discharge': np.nan, 'error': np.nan, 'coverage': np.nan})

    # Mean seasonal means
    mean_seasonal_data = meanSeasonalDischarge(pointID, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold)
    if mean_seasonal_data is not np.nan:
        mean_seasonal_mean = mean_seasonal_data.groupby(mean_seasonal_data.index.month).agg({'discharge': 'mean', 'error': rmse, 'coverage': 'mean'})
    else:
        mean_seasonal_mean = pd.DataFrame(index=[1, 4, 7, 10], columns=['discharge', 'error', 'coverage'])

    # Compile dataframe of all means and errors
    mean_data = pd.DataFrame(index=[pointID], data={
        'Fjord number': fjord_number,
        'Cumulative discharge': cumulative_discharge.discharge,
        'Cumulative error': cumulative_discharge.error,
        'Mean annual mean discharge': mean_annual_mean.discharge,
        'Mean annual mean discharge error': mean_annual_mean.error,
        'Mean hydroyear mean discharge': mean_hydroyear_mean.discharge,
        'Mean hydroyear mean discharge error': mean_hydroyear_mean.error,
        'Mean winter mean discharge': mean_seasonal_mean.loc[1].discharge,
        'Mean winter mean discharge error': mean_seasonal_mean.loc[1].error,
        'Mean spring mean discharge': mean_seasonal_mean.loc[4].discharge,
        'Mean spring mean discharge error': mean_seasonal_mean.loc[4].error,
        'Mean summer mean discharge': mean_seasonal_mean.loc[7].discharge,
        'Mean summer mean discharge error': mean_seasonal_mean.loc[7].error,
        'Mean autumn mean discharge': mean_seasonal_mean.loc[10].discharge,
        'Mean autumn mean discharge error': mean_seasonal_mean.loc[10].error
    })

    # Input data into main dataframe
    discharge_data.loc[pointID] = mean_data.values

discharge_data['Fjord number'] = discharge_data['Fjord number'].astype('int')

pathlib.Path('../databases').mkdir(parents=True, exist_ok=True)
discharge_data.to_csv('../databases/solid_discharge.csv')

In [None]:
fjord_discharge = discharge_data.groupby('Fjord number')['Cumulative discharge (Gt)'].sum()
pathlib.Path('../databases').mkdir(parents=True, exist_ok=True)
fjord_discharge.to_csv('../databases/fjord_cumulative_solid_discharge.csv')
fjord_discharge.head()

## Get combined discharge for each fjord

In [None]:
f = 24
glacier_id_list = glacier_fjord_db[glacier_fjord_db['FjordID'] == f]['GlacierID'].values
print(f'Glaciers in fjord: {glacier_id_list}')
if not np.isnan(glacier_id_list).all():
    data = [getDischargeData(id, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold) for id in glacier_id_list]
    combined_data = [x for x in data if isinstance(x, pd.DataFrame)]
    if bool(combined_data): # if combined_data is not empty
        combined_discharge = sum([df.discharge for df in combined_data])
        combined_error = sum([df.error**2 for df in combined_data])**0.5
        combined_coverage = sum([df.discharge for df in combined_data])/len(combined_data)
        final_data = pd.DataFrame(index=combined_discharge.index, data={'discharge': combined_discharge, 'error': combined_error, 'coverage': combined_coverage})
        final_data = final_data.dropna()

        # fig, ax = plt.subplots()
        # plotGlacierDischarge(ax, final_data, glacier_id_list[0])

In [None]:
fjord_list = sorted(glacier_fjord_db['FjordID'].unique())
pathlib.Path('../databases/fjord_solid_ice_discharge').mkdir(parents=True, exist_ok=True)
pathlib.Path('../figures/combined_fjord_solid_discharge').mkdir(parents=True, exist_ok=True)

for f in fjord_list:
    print(f'Fjord {f}:')
    # get list of glacier ids in this fjord
    glacier_id_list = glacier_fjord_db[glacier_fjord_db['FjordID'] == f]['GlacierID'].values
    if np.isnan(glacier_id_list).all():
        print(f'No glaciers associated with Fjord {f} in Glacier Database XLSX.')
    elif not np.isnan(glacier_id_list).all():
        print(f'Glaciers: {glacier_id_list}')
        fig, ax = plt.subplots()
        # get list of discharge data for each glacier in fjord and reduce to only glaciers with real data (DataFrame, not nan)        
        combined_data = [getDischargeData(id, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold) for id in glacier_id_list]
        combined_data = [x for x in combined_data if isinstance(x, pd.DataFrame)]
        if bool(combined_data): # if combined_data is not empty after removing nan-data glaciers
            # calculate combined discharge, error, and coverage
            combined_discharge = sum([df.discharge for df in combined_data])
            combined_error = sum([df.error**2 for df in combined_data])**0.5
            combined_coverage = sum([df.coverage for df in combined_data])/len(combined_data)
            # combine discharge, error, and coverage into new dataframe and drop nans
            final_data = pd.DataFrame(index=combined_discharge.index, data={'discharge': combined_discharge, 'error': combined_error, 'coverage': combined_coverage})
            final_data = final_data.dropna()
            # plot final data
            plotGlacierDischarge(ax, final_data, glacier_id_list[0])
            # save final data to CSV
            final_data.discharge.to_csv(f'../databases/fjord_solid_ice_discharge/fjord{f:02}_solid_ice_discharge.csv', header=['Discharge (Gt/yr)'], index=True)

        # repeat for seasonal discharge data for each glacier in fjord
        combined_mean_seasonal_data = [meanSeasonalDischarge(id, id_map, gate_discharge, gate_error, gate_coverage, startdate, enddate, threshold) for id in glacier_id_list]
        combined_mean_seasonal_data = [x for x in combined_mean_seasonal_data if isinstance(x, pd.DataFrame)]
        if bool(combined_mean_seasonal_data):
            combined_seasonal_discharge = sum([df.discharge for df in combined_mean_seasonal_data])
            combined_seasonal_error = sum([df.error**2 for df in combined_mean_seasonal_data])**0.5
            combined_seasonal_coverage = sum([df.coverage for df in combined_mean_seasonal_data])/len(combined_mean_seasonal_data)
            final_seasonal_data = pd.DataFrame(index=combined_seasonal_discharge.index, data={'discharge': combined_seasonal_discharge, 'error': combined_seasonal_error, 'coverage': combined_seasonal_coverage})
            final_seasonal_data = final_seasonal_data.dropna()
            final_seasonal_data.discharge.plot(ax=ax, style='.-', alpha=0.8)

        if len(glacier_id_list) == 1:
            glacier_str = '(glacier ' + str([int(x) for x in glacier_id_list]).strip('[]') + ')'
        elif len(glacier_id_list) > 1:
            glacier_str = '(glaciers ' + str([int(x) for x in glacier_id_list]).strip('[]') + ')'
        ax.set_title(f'Fjord #{f} combined ice discharge\n{glacier_str}')
        ax.set_xlabel('Year')
        ax.set_xticks(pd.to_datetime(['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01', '2020-01-01']))
        ax.set_xticklabels(['2015', '2016', '2017', '2018', '2019', '2020'], rotation=0, horizontalalignment='center')
        ax.grid('on')
        ax.legend(handles=ax.get_lines(), labels=['discharge', 'seasonal mean', 'annual mean', 'hydrological year mean'], loc='center left', bbox_to_anchor=(1, 0.5))
        plt.savefig(f'../figures/combined_fjord_solid_discharge/fjord{f:02}_combined_ice_discharge.png', bbox_inches='tight', dpi=300)
        plt.close()