# Make Files 
Need to make sure units are correct

Follow peter for README file https://www.ncei.noaa.gov/access/ocean-carbon-data-system/oceans/

Requirements for ​README​:
● Original study citation and dataset citation if applicable
● Gas transfer velocity (​Kw​) used and the global mean thereof.
● The temperature, salinity, and wind products used to calculate ​Kw​ and​ alpha​ should
be listed.
● Handling of sea-ice with respect to air-sea CO2 fluxes.
● The procedure used to calculate ​pco2atm​ with details of the following: xCO​2​ product,
interpolation, pH​2​O correction used and pressure product.
● Other comments or idiosyncrasies in the dataset that will affect global or regional
comparison

In [1]:
import air_sea_co2_exchange as ase
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import glob
import seaflux
import pandas as pd
import datetime
from datetime import date
import datetime

# Put pCO2 and error into common file

In [2]:
%%time
# load pco2 data 
data_dir = '/home/gloege/projects/ldeo_hpd/data/model_output/XGB/GCB_2020_plus_xco2'
ds_spco2 = xr.merge([xr.open_dataset(fl) for fl in glob.glob(f'{data_dir}/XGB*.nc')])
ds_spco2 = ds_spco2.sel(time=slice("1982","2018"))   

# only pick out the variables starting with corrected_
variables = list(ds_spco2)
match_corr = [var for var in variables if f"corrected_" in var]

# put all dataarrays in list, add new model coord, 
#  and change name of each to be spco2
merge_files_corr = [
    ds_spco2[var]\
    .expand_dims('model')\
    .assign_coords(model=[f"{var.split('_')[1]}"])\
    .rename('spco2') for var in match_corr
    ]

# merge all the files
ds_corr = xr.merge(merge_files_corr)

CPU times: user 28.2 s, sys: 51.8 s, total: 1min 20s
Wall time: 1min 21s


In [5]:
del match_corr, merge_files_corr

In [6]:
%%time
# load pco2 data 
data_dir = '/home/gloege/projects/ldeo_hpd/data/model_output/XGB/GCB_2020_plus_xco2'
ds_spco2 = xr.merge([xr.open_dataset(fl) for fl in glob.glob(f'{data_dir}/XGB*.nc')])
ds_spco2 = ds_spco2.sel(time=slice("1982","2018"))   

# only pick out the variables starting with corrected_
variables = list(ds_spco2)
match_error = [var for var in variables if f"error_" in var]

# put all DataArrays into a list, add new model coord,
#  and change name to error
#  hpd outputs error as obs - model
#  -1 chnages error to model - obs
merge_files_error = [
    ds_spco2[var]\
    .expand_dims('model')\
    .assign_coords(model=[f"{var.split('_')[1]}"])\
    .rename('error')*(-1) for var in match_error
    ]

# merge all the files
ds_error = xr.merge(merge_files_error)

CPU times: user 14.1 s, sys: 22.5 s, total: 36.6 s
Wall time: 37.6 s


In [8]:
del match_error, merge_files_error

In [9]:
# merge corrected and error files
ds_merge = xr.merge([ds_corr, ds_error])

In [17]:
# variable attributes 
ds_merge['spco2'].attrs = {'long_name':'sea surface pCO2',
                       'standard_name': 'sea surface pCO2' ,
                       'units': 'uatm',}

ds_merge['error'].attrs = {'long_name':'model minus observations',
                       'standard_name': 'model error' ,
                       'units': 'uatm',}

# coordinate attributes
ds_merge['time'].attrs = {'long_name':'time',}

ds_merge['lat'].attrs = {'long_name':'latitude',
                       'units': 'degrees_north',}
ds_merge['lon'].attrs = {'long_name':'longtude',
                       'units': 'degrees_east',}
ds_merge['model'].attrs = {'long_name':'model name',
                       'units': 'NA',}

# global attributes
now = datetime.datetime.now()
ds_merge.attrs = {'institution': 'Lamont-Doherty Earth Observatory (LDEO)',
                'version': f"LDEO-HPD_v{str(date.today()).replace('-','')}",
                'contact': 'Luke Gloege (ljg2157@columbia.edu)',
                'creation_date': f'{now.strftime("%Y-%m-%d")}'}

In [18]:
ds_merge.info()

xarray.Dataset {
dimensions:
	lat = 180 ;
	lon = 360 ;
	model = 9 ;
	time = 444 ;

variables:
	object model(model) ;
		model:long_name = model name ;
		model:units = NA ;
	datetime64[ns] time(time) ;
		time:long_name = time ;
	float64 lat(lat) ;
		lat:long_name = latitude ;
		lat:units = degrees_north ;
	float64 lon(lon) ;
		lon:long_name = longtude ;
		lon:units = degrees_east ;
	float64 spco2(model, time, lat, lon) ;
		spco2:long_name = sea surface pCO2 ;
		spco2:standard_name = sea surface pCO2 ;
		spco2:units = uatm ;
	float32 error(model, time, lat, lon) ;
		error:long_name = model minus observations ;
		error:standard_name = model error ;
		error:units = uatm ;

// global attributes:
	:institution = Lamont-Doherty Earth Observatory (LDEO) ;
	:version = LDEO-HPD_v20210425 ;
	:contact = Luke Gloege (ljg2157@columbia.edu) ;
	:creation_date = 2021-04-25 ;
}

## Save file

In [21]:
ds_merge.to_netcdf(f"/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_v{str(date.today()).replace('-','')}_1x1_198201-201812.nc")

# Fluxes via fluxkit

In [2]:
%%time
'''
wind_products : 'NCEP1' 'NCEP2' 'CCMP2' 'ERA5' 'JRA55'
sol   : mol/m3/uatm
k2    : cm/hr
spco2 : uatm
flux  : mol/m2/s
'''
#-----------------------------------------------------------------------------------------
# load pco2 data 
#-----------------------------------------------------------------------------------------
ds_spco2 = xr.open_dataset('/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_v20210425_1x1_198201-201812.nc')
ds_spco2 = ds_spco2.sel(time=slice("1985","2018"))

#-----------------------------------------------------------------------------------------
# use fluxkit to calculate flux 
#-----------------------------------------------------------------------------------------
fluxkit_dir = '/data/artemis/observations/SOCOM/extra_files'
fluxkit_path=f'{fluxkit_dir}/FluxKit_calculation_1982-2019_v20201120.nc'
fk = (xr.open_dataset(fluxkit_path))

# change times to center on 15th of month
fk['time'] = pd.date_range(
    start=f'1985-01T00:00:00.000000000', 
    end=f'2018-12T00:00:00.000000000',freq='MS') + np.timedelta64(14, 'D')

# replace longitude to 0-360
fk['lon'] = list(map(lambda x: x+360 if x<0 else x, fk['lon'].values))

# sort by longitude
fk = fk.sortby('lon')

# fill spco2 with climatology (uatm)
spco2_filled = ds_spco2['spco2'].fillna(fk.spco2_clim * fk.spco2_scaling)

# calculate delta pco2 (uatm)
dpco2 = spco2_filled - fk.atm

# calculate ice weighting (fraction)
ice_weighting = 1 - fk.ice.fillna(0)

# solubility (mol/m3/uatm) and kw converted from (cm/hr) to (m/s)
cm_to_m = (1/100)
hr_to_s = (1/3600)
sol = fk.sol_Weiss74
kw = fk.kw_scaled * cm_to_m * hr_to_s

# calculate flux (mol/m2/s)
flux = sol * kw * dpco2 * ice_weighting 


CPU times: user 28 s, sys: 20.3 s, total: 48.4 s
Wall time: 48.2 s


In [3]:
#-----------------------------------------------------------------------------------------
# put all the components into common file  
#-----------------------------------------------------------------------------------------
ds_spco2['area'] = fk.area
ds_spco2['fgco2'] = flux.transpose('wind','model','time','lat','lon') # mol/m2/s
ds_spco2['spco2_filled'] = spco2_filled                               # uatm

In [4]:
# organize the data a little 
ds_out = ds_spco2[['fgco2','spco2','spco2_filled','area']].sel(wind=['CCMP2','ERA5','JRA55'])

In [5]:
ds_out['wind'].attrs = {'long_name':'wind products',
                       'description': 'Three wind products used for flux calculation (wnd_avg^2 + wnd_sdt^2)^0.5',}

In [6]:
ds_out.info()

xarray.Dataset {
dimensions:
	lat = 180 ;
	lon = 360 ;
	model = 9 ;
	time = 408 ;
	wind = 3 ;

variables:
	float64 fgco2(wind, model, time, lat, lon) ;
	float64 spco2(model, time, lat, lon) ;
		spco2:long_name = sea surface pCO2 ;
		spco2:standard_name = sea surface pCO2 ;
		spco2:units = uatm ;
	float64 spco2_filled(model, time, lat, lon) ;
		spco2_filled:long_name = sea surface pCO2 ;
		spco2_filled:standard_name = sea surface pCO2 ;
		spco2_filled:units = uatm ;
	float64 area(lat, lon) ;
		area:long_name = area_per_pixel ;
		area:description = area per pixel ;
		area:units = m^2 ;
	object model(model) ;
		model:long_name = model name ;
		model:units = NA ;
	object wind(wind) ;
		wind:long_name = wind products ;
		wind:description = Three wind products used for flux calculation (wnd_avg^2 + wnd_sdt^2)^0.5 ;
	float64 lon(lon) ;
		lon:long_name = longtude ;
		lon:units = degrees_east ;
	float64 lat(lat) ;
		lat:long_name = latitude ;
		lat:units = degrees_north ;
	datetime64[ns] time(t

# global average

In [7]:
%%time
file_list = [ds_out['fgco2'].sel(model=mod).mean('wind') for mod in list(ds_out.model.values)]
da_fgco2 = xr.concat(file_list, dim='model')

  return np.nanmean(a, axis=axis, dtype=dtype)


CPU times: user 9.83 s, sys: 5.93 s, total: 15.8 s
Wall time: 15.8 s


In [8]:
%%time
# calculate regional averages (mol/m2/s)
grams_in_mol = 12.01            # g/mol
sec_to_year = 86400 * 365       # sec/year
gram_to_petagram = 1 / (10**15) # Pg/g

# conversion to mol/s to Pg/yr
conversion = grams_in_mol * gram_to_petagram * sec_to_year
    
# calculate global fluxes
flux_tmp = da_fgco2 * ds_out['area'] * conversion

CPU times: user 729 ms, sys: 2.25 s, total: 2.98 s
Wall time: 2.98 s


In [9]:
%%time
# masking because sum makes NaN 0 when summing
tmp = (flux_tmp[1,:,:,:].mean(['lat','lon']).notnull()*1)
mask = tmp.where(tmp==1)

CPU times: user 152 ms, sys: 118 ms, total: 270 ms
Wall time: 269 ms


## calculate global average

In [10]:
# RECCAP2 regions 
ds_regions = xr.open_dataset('/home/gloege/projects/ldeo_hpd/data/regions/RECCAP2_region_masks_all.nc')

In [11]:
region_global = (ds_regions[f'open_ocean']>=1)*1

In [12]:
%%time
flux_region1 = [(flux_tmp[i,:,:,:] * region_global).\
               sum(['lat','lon']).rename('fgco2_glob') for i in range(0,9)]

CPU times: user 1.31 s, sys: 1.69 s, total: 3 s
Wall time: 3 s


In [13]:
ds_tmp = xr.concat(flux_region1, dim='model')

In [14]:
flux_region = ds_tmp * mask

In [15]:
ds_out['fgco2_glob'] = flux_region

In [16]:
ds_out['fgco2_avg'] = da_fgco2.rename('fgco2_avg')

In [17]:
ds_out['fgco2_glob'].attrs = {'long_name':'global CO2 flux density',
                       'standard_name': 'global CO2 flux density' ,
                       'units': 'PgC/yr' ,}
ds_out['fgco2'].attrs = {'long_name':'CO2 flux density',
                       'standard_name': 'CO2 flux density' ,
                       'units': 'mol/m2/s' ,}

In [18]:
ds_out

<xarray.Dataset>
Dimensions:       (lat: 180, lon: 360, model: 9, time: 408, wind: 3)
Coordinates:
  * model         (model) object 'cesm' 'cnrm' ... 'planktom' 'princeton'
  * wind          (wind) object 'CCMP2' 'ERA5' 'JRA55'
  * lon           (lon) float64 0.5 1.5 2.5 3.5 4.5 ... 356.5 357.5 358.5 359.5
  * lat           (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5
  * time          (time) datetime64[ns] 1985-01-15 1985-02-15 ... 2018-12-15
Data variables:
    fgco2         (wind, model, time, lat, lon) float64 nan nan nan ... nan nan
    spco2         (model, time, lat, lon) float64 ...
    spco2_filled  (model, time, lat, lon) float64 nan nan nan ... 324.6 324.1
    area          (lat, lon) float64 nan nan nan ... 1.074e+08 1.074e+08
    fgco2_glob    (model, time) float64 -2.169 -1.467 -1.993 ... -2.826 -2.955
    fgco2_avg     (model, time, lat, lon) float64 nan nan nan ... nan nan nan
Attributes:
    institution:    Lamont-Doherty Earth Observatory (LDEO)
    v

## save data

In [19]:
ds_out[['spco2','spco2_filled']].to_netcdf(f"/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_spco2_v{str(date.today()).replace('-','')}_1x1_198201-201812.nc")

In [20]:
ds_out[['fgco2_glob','fgco2_avg','area']].to_netcdf(f"/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_fgco2_v{str(date.today()).replace('-','')}_1x1_198201-201812.nc")

In [22]:
ds_out[['fgco2']].to_netcdf(f"/local/data/artemis/workspace/gloege/ldeo-hpd/LDEO-HPD_fgco2-full_v{str(date.today()).replace('-','')}_1x1_198201-201812.nc")

In [21]:
ds_out.info()

xarray.Dataset {
dimensions:
	lat = 180 ;
	lon = 360 ;
	model = 9 ;
	time = 408 ;
	wind = 3 ;

variables:
	float64 fgco2(wind, model, time, lat, lon) ;
		fgco2:long_name = CO2 flux density ;
		fgco2:standard_name = CO2 flux density ;
		fgco2:units = mol/m2/s ;
	float64 spco2(model, time, lat, lon) ;
		spco2:long_name = sea surface pCO2 ;
		spco2:standard_name = sea surface pCO2 ;
		spco2:units = uatm ;
	float64 spco2_filled(model, time, lat, lon) ;
		spco2_filled:long_name = sea surface pCO2 ;
		spco2_filled:standard_name = sea surface pCO2 ;
		spco2_filled:units = uatm ;
	float64 area(lat, lon) ;
		area:long_name = area_per_pixel ;
		area:description = area per pixel ;
		area:units = m^2 ;
	object model(model) ;
		model:long_name = model name ;
		model:units = NA ;
	object wind(wind) ;
		wind:long_name = wind products ;
		wind:description = Three wind products used for flux calculation (wnd_avg^2 + wnd_sdt^2)^0.5 ;
	float64 lon(lon) ;
		lon:long_name = longtude ;
		lon:units = degrees

# Old stuff

In [None]:
%%time
flux_region1 = [(flux_tmp[0,i,:,:,:] * region_global).\
               sum(['lat','lon']).rename('fgco2_glob') for i in range(0,9)]

flux_region2 = [(flux_tmp[1,i,:,:,:] * region_global).\
               sum(['lat','lon']).rename('fgco2_glob') for i in range(0,9)]

flux_region3 = [(flux_tmp[2,i,:,:,:] * region_global).\
               sum(['lat','lon']).rename('fgco2_glob') for i in range(0,9)]

In [37]:
ds_wind1 = xr.concat(flux_region1, dim='model')
ds_wind2 = xr.concat(flux_region2, dim='model')
ds_wind3 = xr.concat(flux_region3, dim='model')

In [62]:
ds_tmp = xr.concat([ds_wind1, ds_wind2, ds_wind3], dim='wind')

In [41]:
flux_region = ds_tmp * mask

In [47]:
ds_out['fgco2_glob'] = flux_region