# flux figures

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'png'
%config InlineBackend.print_figure_kwargs = {'dpi':300, 'bbox_inches': 'tight'}

# standard imports
import os
import scipy
import numpy as np
import xarray as xr
import pandas as pd
import glob

# modeling
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pgml

from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

# plotting
import cmocean as cm
import matplotlib as mpl
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import cartopy.feature as cfeature
from matplotlib.ticker import AutoMinorLocator
from plotting_tools.spatial_map import SpatialMap
from plotting_tools.time_series_diagram import TimeSeriesPlot

# regression metrics
from sklearn.metrics import r2_score
from sklearn.metrics import max_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
#from sklearn.metrics import explained_variance_score

import pgml
import os
# settings 
pd.set_option("display.max_columns", 100)

In [2]:
#dir_raw = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw'
#dir_clean = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean'
dir_figs = '/home/gloege/projects/ldeo_hpd/reports/figures'

## Function Definitions

In [3]:
### ================================================
### force time vector to be proper format and range
### ================================================
def make_dates(start=None, end=None):
    '''
    Creates a monthly DateTime64 vector centered on 15th each month
    
    Input
    =========
    start : str 'YYYY-MM'
    end :   str 'YYYY-MM'
            
    Output
    =========
    dates : dataArray 
    
    Example
    ==========
    dates = make_dates(start='1990-01', end='2017-12')
    '''
    dates = pd.date_range(start=f'{start}-01T00:00:00.000000000', 
                          end=f'{end}-01T00:00:00.000000000',
                          freq='MS')+ np.timedelta64(14, 'D')
    da = xr.DataArray(dates, dims='time')
    
    return da

In [4]:
def convert_to_datetime64(times, use_cftime):
    """
    Return times as np.datetime64[ns] or cftime.DatetimeProlepticGregorian
    depending on whether the dates fall within the inclusive bounds of 
    np.datetime64[ns]: [1678-01-01 AD, 2261-12-31 AD].

    Alternatively, always returns as cftime.DatetimeProlepticGregorian if
    `use_cf_time` is True.
    
    source : https://gitlab.com/deltares/imod/imod-python/commit/9b1ae0e9a51b0fefe6ef86c2a2f9ef988024b464
    """
    out_of_bounds = False
    if use_cftime:
        converted = [
            cftime.DatetimeProlepticGregorian(*time.timetuple()[:6]) for time in times
        ]
    else:
        for time in times:
            year = time.year
            if year < 1678 or year > 2261:
                out_of_bounds = True
                break

        if out_of_bounds:
            warnings.warn(
                "Dates are outside of np.datetime64[ns] timespan."
                "Converting to cftime.DatetimeProlepticGregorian."
            )
            converted = [
                cftime.DatetimeProlepticGregorian(*time.timetuple()[:6]) for time in times
            ]
        else:
            converted = [np.datetime64(time, "ns") for time in times]

    return converted

In [5]:
def area(lat, dlat=1, dlon=1):
    '''
    WGS 84: https://earth-info.nga.mil/GandG/publications/tr8350.2/tr8350.2-a/Chapter%203.pdf
    look table 3.7
    
    using the WGS 84 ellipsoid 
    
    R equation is here: https://planetcalc.com/7721/
    '''
    deg_to_rad = np.pi/180
    dlat_r = dlat * deg_to_rad
    dlon_r = dlon * deg_to_rad
    lat_r = lat * deg_to_rad
    
    # radius by latitude assuming oblate spheroid
    # using a and b from WGS 84
    # I rounded b from 6356752.3142
    a = 6378137 # semimajor (m)
    b = 6356752 # semiminor (m)
    numer = ( (a**2 * np.cos(lat_r))**2 + (b**2 * np.sin(lat_r))**2 )
    denom = ( (a * np.cos(lat_r))**2 + (b * np.sin(lat_r))**2 )
    R = np.sqrt(numer / denom) # radius of earth 
    
    return R * R * dlat_r * dlon_r  * np.cos(lat_r)

## broadcast area to lon and time
#lon = np.arange(0.5,360,1)
#time = pd.date_range(start='1982-01-01T00:00:00.000000000', 
#                     end='2016-12-01T00:00:00.000000000',freq='MS')+ np.timedelta64(14, 'D')
#ds_bc = xr.DataArray(np.zeros([len(time),len(lon)]), coords=[('time', time),('lon', lon)])
#ds_data, ds_mask = xr.broadcast(area(ds_hpd['lat']), ds_bc)
#ds_data = ds_data.transpose('time','lat','lon')
#ds_area = ds_data.rename('area').to_dataset()

## Load HPD

In [39]:
%%time
ds_hpd = xr.open_dataset('/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_fgco2_v20210426_1x1_198201-201812.nc')

CPU times: user 5.75 ms, sys: 6.92 ms, total: 12.7 ms
Wall time: 37.8 ms


# Load products

In [40]:
ds_prod = xr.open_dataset('/local/data/artemis/workspace/gloege/gregor/IPCC-AR6_FCO2_DataProducts_filled_20201118.nc')

ds_prod['lon'] = [lon if lon>0 else lon+360 for lon in ds_prod['lon'].values]
ds_prod['time'] = pd.date_range(
    start=f'1985-01-01T00:00:00.000000000', 
    end=f'2018-12-01T00:00:00.000000000',
    freq='MS') + np.timedelta64(14, 'D')

ds_prod['fgco2'] = ds_prod['fgco2']*365
ds_prod['fgco2'].attrs = {'description':'Air-sea CO2 fluxes, where negative is into the ocean',
                         'units':'mol/m2/yr'}

# Flux density

## models

In [41]:
flux_den_mod = pd.read_csv('/local/data/artemis/workspace/gloege/GCB/GCB2020_fluxes.csv')

In [42]:
flux_den_mod['Mean models'] = flux_den_mod['Mean models']*(-1)

In [43]:
flux_den_mod.head()

Unnamed: 0,years,CESM-ETH,CSIRO,FESOM,MPI,CNRM,PlankTOM,NorESM,Princeton,IPSL,Mean models,model std
0,1959,0.95,1.03,1.01,0.8,0.79,0.89,1.1,0.35,0.83,-0.86,0.22
1,1960,0.89,1.03,1.03,0.73,0.77,0.84,1.07,0.48,0.71,-0.84,0.19
2,1961,0.62,0.89,0.9,0.72,0.5,0.8,1.01,0.42,0.59,-0.72,0.2
3,1962,0.72,1.0,0.84,0.65,0.44,0.94,1.19,0.51,0.62,-0.77,0.24
4,1963,0.9,1.18,0.93,0.72,0.63,1.06,1.35,0.66,0.81,-0.92,0.24


## prodcuts

In [44]:
flux_den_prods = (ds_prod['fgco2'].mean('wind')*(12.01/10**15)*ds_prod['area']).sum(['lat','lon'])
flux_den_prods_yr = flux_den_prods.groupby('time.year').mean('time')

  return np.nanmean(a, axis=axis, dtype=dtype)


# decdal mean flux

In [7]:
ds_spco2  = xr.open_dataset('/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_spco2_v20210426_1x1_198201-201812.nc')

In [8]:
mask = (ds_spco2['spco2'].sel(model='cesm').mean('time')>0)*1

  return np.nanmean(a, axis=axis, dtype=dtype)


In [21]:
#plt.pcolor(mask)
#plt.colorbar()

In [27]:
%%time
ds_tmp = xr.open_dataset('/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_fgco2_v20210426_1x1_198201-201812.nc')
ds_fgco2 = ds_tmp ['fgco2_avg'].mean('model')
ds_area = ds_tmp['area']

CPU times: user 2.8 s, sys: 5.79 s, total: 8.59 s
Wall time: 8.67 s


  return np.nanmean(a, axis=axis, dtype=dtype)


In [36]:
# total area of ocean
#total_area = ds_area.sum()

# calculate regional averages (mol/m2/s)
grams_in_mol = 12.01            # g/mol
sec_to_year = 86400 * 365       # sec/year
gram_to_petagram = 1 / (10**15) # Pg/g

# conversion to mol/s to Pg/yr
conversion = grams_in_mol * gram_to_petagram * sec_to_year

# time ranges 
slices = [slice('1982-01','1989-12'),
          slice('1990-01','1999-12'),
          slice('2000-01','2009-12'),
          slice('2010-01','2018-12')]

print('Unfilled  Filled')
for time_range in slices:
    filled = ((((ds_fgco2*conversion)*ds_area).sum(['lat','lon']))).sel(time=time_range).mean('time').values
    unfilled = ((((ds_fgco2*conversion)*ds_area*mask).sum(['lat','lon']))).sel(time=time_range).mean('time').values
    print(f'{unfilled:0.3}, {filled:0.3}')
    print('')


Unfilled  Filled
-1.38, -1.53

-1.48, -1.65

-1.49, -1.69

-1.96, -2.23

