# What does the PDF of fluxes look like at high and low res?

In [1]:
%matplotlib inline
import os
from glob import glob

import xarray as xr
import dask
import numpy as np
import pandas as pd

import ncar_jobqueue

import cftime

import cesm_orcas_sci as cesm
import grid_tools

import matplotlib.pyplot as plt

xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x2aab613caf60>

In [2]:
xr_open = dict(decode_times=False, decode_coords=False)

def set_coords(ds, varname):
    """Set all variables except varname to be coords."""
    coord_vars = set(ds.data_vars) - set(varname)
    return ds.set_coords(coord_vars)

def open_dataset(file, data_vars, drop_vars):
    ds = xr.open_dataset(f, chunks={'time': 5}, **xr_open)
    ds = ds.drop(drop_vars(ds, data_vars))
    return set_coords(ds, data_vars)

def concat_time_levels(dsets, time_coord_name='time'):
    """
    Concatenate datasets across "time" levels, taking time invariant variables
    from the first dataset.
    Parameters
    ----------
    dsets : list
        A list of datasets to concatenate.
    time_coord_name : string
        Default name of the time coordinate
    Returns
    -------
    dset : xarray.Dataset,
        The concatenated dataset.
    """
    dsets = dask.compute(*dsets)
    if len(dsets) == 1:
        return dsets[0]

    # get static vars from first dataset
    first = dsets[0]
    
    def drop_unnecessary_coords(ds):
        """Drop coordinates that do not correspond with dimensions."""
        non_dim_coords = set(ds.coords) - set(ds.dims)
        non_dim_coords_drop = [
            coord for coord in non_dim_coords if time_coord_name not in ds[coord].dims
        ]
        return ds.drop(non_dim_coords_drop)

    rest = [drop_unnecessary_coords(ds) for ds in dsets[1:]]
    objs_to_concat = [first] + rest

    ds = xr.concat(objs_to_concat, dim=time_coord_name, coords='minimal')

    return ds

mol_per_nmol = 1e-9 
cm2_per_m2 = 1e4 
s_per_yr = 365.0 * 86400.0
nmolcm2s_to_molm2yr = mol_per_nmol * cm2_per_m2 * s_per_yr

In [3]:
cluster = ncar_jobqueue.NCARCluster()
client = dask.distributed.Client(cluster)
n_workers = 9 * 4
cluster.scale(n_workers)

  "diagnostics_port has been deprecated. "


In [4]:
!qstat | grep dask-worker

/usr/bin/sh: module: line 1: syntax error: unexpected end of file
/usr/bin/sh: error importing function definition for `BASH_FUNC_module'
/usr/bin/sh: ml: line 1: syntax error: unexpected end of file
/usr/bin/sh: error importing function definition for `BASH_FUNC_ml'


In [None]:
client

0,1
Client  Scheduler: tcp://10.148.8.164:49525  Dashboard: http://10.148.8.164/proxy/8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [None]:
case = 'bgeos5.B20TRC5CN.f09_g16.BPRD_orcas_sci.004'

dslo = cesm.open_casedata(case, 'atm', 'cam.h0', ['SFCO2_OCN', 'SFO2_OCN'])

dslo = dslo.sel(time=slice('2011', '2015')).sel(lat=slice(-90., -30.))

dslo = cesm.convert_dataset(dslo, case)
area = grid_tools.compute_grid_area(dslo.lon.values, dslo.lat.values)
dslo['area'] = xr.DataArray(area, dims=('lat', 'lon'))
dslo

total area = 1.2691127007642938e+14
check area = 5.1009969907076156e+14


<xarray.Dataset>
Dimensions:    (ilev: 57, lat: 64, lev: 56, lon: 288, nbnd: 2, slat: 191, slon: 288, time: 1826)
Coordinates:
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... -32.51 -31.57 -30.63
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
  * ilev       (ilev) float64 1.651 2.085 2.62 3.276 ... 955.0 970.0 985.0 1e+03
  * lev        (lev) float64 1.868 2.353 2.948 3.677 ... 947.5 962.5 977.5 992.5
  * slat       (slat) float64 -89.53 -88.59 -87.64 -86.7 ... 87.64 88.59 89.53
  * slon       (slon) float64 -0.625 0.625 1.875 3.125 ... 355.6 356.9 358.1
  * time       (time) object 2011-01-01 12:00:00 ... 2015-12-31 12:00:00
Dimensions without coordinates: nbnd
Data variables:
    P0         float64 1e+05
    SFCO2_OCN  (time, lat, lon) float32 dask.array<shape=(1826, 64, 288), chunksize=(3, 64, 288)>
    ch4vmr     (time) float64 dask.array<shape=(1826,), chunksize=(3,)>
    co2vmr     (time) float64 dask.array<shape=(1826,), chunksize=(3,)

In [None]:
files = glob('/glade/scratch/mclong/hi-res-eco/g.e11.G.T62_t12.eco.006/ocn/hist/g.e11.G.T62_t12.eco.006.pop.h.????-??-??.nc')

drop_vars = lambda ds, data_vars: [v for v in ds.variables if v not in data_vars+['TAREA', 'TLAT', 'TLONG', 'time', 'time_bound']]

i = 0
dsets = []
for f in files:
    dsets.append(dask.delayed(open_dataset)(f, data_vars=['FG_CO2', 'STF_O2'], drop_vars=drop_vars))
    if i > 2: break
    i += 1

In [None]:
dshi = concat_time_levels(dsets)
nlat30S = np.where(dshi.TLAT[:, 0] > -30.)[0][0]
dshi = dshi.isel(nlat=slice(0, nlat30S))

date = cftime.num2date(dshi.time_bound.mean('d2'), units=dshi.time.units, 
                       calendar=dshi.time.calendar)
dshi['time'] = xr.CFTimeIndex(date) 

dshi['FG_CO2'] = (-1.0) * dshi.FG_CO2 * nmolcm2s_to_molm2yr
dshi.FG_CO2.attrs['units'] = 'mol m$^{-1}$ yr$^{-1}$'

dshi['STF_O2'] = (-1.0) * dshi.STF_O2 * nmolcm2s_to_molm2yr
dshi.STF_O2.attrs['units'] = 'mol m$^{-1}$ yr$^{-1}$'

dshi['TAREA'] = dshi.TAREA * 1e-4
dshi = dshi.reset_coords('TAREA')

In [None]:
co2_flux_bins = np.arange(-10., 10.2, 0.2)

In [None]:
fco2_area_bin_lo = dslo.groupby_bins('SFCO2_OCN', co2_flux_bins).sum(xr.ALL_DIMS).area.compute()
fco2_area_bin_lo



<xarray.DataArray 'area' (SFCO2_OCN_bins: 100)>
array([7.234382e+13, 7.861453e+13, 8.631224e+13, 9.427856e+13, 1.020207e+14,
       1.114192e+14, 1.224123e+14, 1.355316e+14, 1.465408e+14, 1.618879e+14,
       1.771524e+14, 1.950053e+14, 2.177370e+14, 2.359450e+14, 2.645455e+14,
       2.917997e+14, 3.196110e+14, 3.558798e+14, 4.006309e+14, 4.381523e+14,
       4.848564e+14, 5.433413e+14, 6.103320e+14, 6.761336e+14, 7.543384e+14,
       8.438695e+14, 9.514930e+14, 1.062496e+15, 1.192699e+15, 1.328694e+15,
       1.483101e+15, 1.662815e+15, 1.849145e+15, 2.060926e+15, 2.294616e+15,
       2.585725e+15, 2.896879e+15, 3.226147e+15, 3.608532e+15, 4.042972e+15,
       4.546810e+15, 5.115115e+15, 5.763734e+15, 6.486616e+15, 7.351072e+15,
       8.337291e+15, 9.557645e+15, 1.097985e+16, 1.267606e+16, 1.949869e+16,
       4.945872e+16, 9.896105e+15, 7.530550e+15, 6.042692e+15, 4.940283e+15,
       4.047407e+15, 3.356289e+15, 2.787876e+15, 2.320118e+15, 1.929392e+15,
       1.606873e+15, 1.34820

In [None]:
fco2_area_bin_hi = dshi.groupby_bins('FG_CO2', co2_flux_bins).sum(xr.ALL_DIMS).TAREA.compute()
fco2_area_bin_hi



<xarray.DataArray 'TAREA' (FG_CO2_bins: 100)>
array([         nan, 3.315582e+08,          nan,          nan, 9.933456e+08,
       5.682364e+08, 2.251416e+09, 2.534417e+09, 3.888648e+09, 4.737059e+09,
       1.254718e+10, 1.967431e+10, 3.675547e+10, 4.267835e+10, 4.744559e+10,
       7.196820e+10, 1.930109e+11, 2.654444e+11, 3.193308e+11, 3.988935e+11,
       5.275528e+11, 6.462753e+11, 7.558320e+11, 9.923501e+11, 1.204871e+12,
       1.334062e+12, 1.593047e+12, 1.890123e+12, 2.426977e+12, 3.083901e+12,
       3.841861e+12, 5.394927e+12, 6.713413e+12, 7.789577e+12, 8.431594e+12,
       9.490144e+12, 1.034743e+13, 1.110576e+13, 1.191293e+13, 1.227624e+13,
       1.408922e+13, 1.654623e+13, 1.942131e+13, 2.245567e+13, 2.427023e+13,
       2.769270e+13, 2.902386e+13, 2.398825e+13, 2.028433e+13, 2.740887e+13,
       4.663697e+13, 1.565846e+13, 1.071145e+13, 8.830692e+12, 7.030171e+12,
       4.786178e+12, 3.077822e+12, 1.789584e+12, 1.024792e+12, 7.394162e+11,
       5.573921e+11, 4.619617e

In [None]:
fco2_area_bin_lo.plot()

[<matplotlib.lines.Line2D at 0x2aaccd1276d8>]

In [None]:
fco2_area_bin_lo.plot()
fco2_area_bin_hi.plot()