# Define : function definitions

This defines all the functions used to process output

**Updates**
- June 17 2018 -> added read_peter()
- July 09 2018 -> added read_jones()

In [1]:
import xarray as xr
import numpy as np
import pandas as pd

# read_biomes : reads and stores Fay and McKinley biomes in xarray.DataSet

In [13]:
def read_biomes():
    ### Read raw biomes file
    data_raw = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw'
    ds_biomes = xr.open_dataset(f'{data_raw}/biomes/fay_mckinley_biomes.nc')

    ### 
    ds = xr.Dataset(
        {
        'mean_biomes':(['lat','lon'], ds_biomes['MeanBiomes'].roll(lon=-180).T),
        },

        coords={
        'lat': (['lat'], ds_biomes['lat']),
        'lon': (['lon'], ds_biomes['lon']+180),
        })

    return ds

# read_peter : reads SOMFFN reconstruction.

This functions reads the SOMFFN reconstructions from large ensembles either the CESM, GFDL, MPI, or CanESM2 and 25 members from each

### Data storage
`/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/interpolated/SOMFFN/`

In [1]:
def read_peter(model=None, member=None):
    '''
    read_peter : reads pco2 from model and member defined

    Inputs
    ==============
    model  : CESM, GFDL, MPI, CanESM2 
    member : 1-30, if CanESM2 then r1r1-r5r10

    Returns
    ==============
    output : xarrayDataset  

    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - masked same way as SOMFFN SOCAT product
    '''
    ### ================================================
    ### Data directory
    ### ================================================
    #data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/SOMFFN/' + model + '/'
    data_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/interpolated/SOMFFN/{model}'
    
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CESM{member:0>3}_1x1_198201-201701_SOMFFN.nc'
    
    if model.upper()=='GFDL':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_GFDL{member:0>2}_1x1_198201-201701_SOMFFN.nc'
        
    if model.upper()=='MPI':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_MPI{member:0>3}_1x1_198201-201701_SOMFFN.nc'

    if model.upper()=='CANESM2':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CanESM2{member}_1x1_198201-201712_SOMFFN.nc'
        
    #print(fl)
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ### ================================================
    ### roll(lon=180) moves the last 180 long points to end
    ### roll_coords will be set to False in the future. 
    ### Explicitly set roll_coords to silence warning
    ### ================================================
    pco2 = ds['pCO2_SOMFFN'].roll(lon=180, roll_coords='lon')
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon'] +180),
            'time': (['time'], ds['time'])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out

In [None]:
def read_somffn(model=None, member=None):
    '''
    read_somffn : reads pco2 from model and member defined

    Inputs
    ==============
    model  : CESM, GFDL, MPI, CanESM2 
    member : 1-30, if CanESM2 then r1r1-r5r10

    Returns
    ==============
    output : xarrayDataset  

    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - masked same way as SOMFFN SOCAT product
    '''
    ### ================================================
    ### Data directory
    ### ================================================
    #data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/SOMFFN/' + model + '/'
    data_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/interpolated/SOMFFN/{model}'
    
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CESM{member:0>3}_1x1_198201-201701_SOMFFN.nc'
    
    if model.upper()=='GFDL':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_GFDL{member:0>2}_1x1_198201-201701_SOMFFN.nc'
        
    if model.upper()=='MPI':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_MPI{member:0>3}_1x1_198201-201701_SOMFFN.nc'

    if model.upper()=='CANESM2':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CanESM2{member}_1x1_198201-201712_SOMFFN.nc'
        
    #print(fl)
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ### ================================================
    ### roll(lon=180) moves the last 180 long points to end
    ### roll_coords will be set to False in the future. 
    ### Explicitly set roll_coords to silence warning
    ### ================================================
    pco2 = ds['pCO2_SOMFFN'].roll(lon=180, roll_coords='lon')
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon'] +180),
            'time': (['time'], ds['time'])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out

In [None]:
def read_somffn_float(model=None, member=None):
    '''
    read_somffn : reads pco2 from model and member defined

    Inputs
    ==============
    model  : CESM, GFDL, MPI, CanESM2 
    member : 1-30, if CanESM2 then r1r1-r5r10

    Returns
    ==============
    output : xarrayDataset  

    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - masked same way as SOMFFN SOCAT product
    '''
    ### ================================================
    ### Data directory
    ### ================================================
    #data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/SOMFFN/' + model + '/'
    data_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/interpolated/SOMFFN_float/{model}'
    
    ### ================================================
    ### Get file path
    ### ================================================
    #pCO2_2D_mon_MPI018_1x1_198201-201701_SOMFFN_float.nc
    if model.upper()=='CESM':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CESM{member:0>3}_1x1_198201-201701_SOMFFN_float.nc'
    
    if model.upper()=='GFDL':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_GFDL{member:0>2}_1x1_198201-201701_SOMFFN_float.nc'
        
    if model.upper()=='MPI':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_MPI{member:0>3}_1x1_198201-201701_SOMFFN_float.nc'

    if model.upper()=='CANESM2':
        #fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        fl = f'{data_dir}/pCO2_2D_mon_CanESM2{member}_1x1_198201-201701_SOMFFN_float.nc'
        
    #print(fl)
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ### ================================================
    ### roll(lon=180) moves the last 180 long points to end
    ### roll_coords will be set to False in the future. 
    ### Explicitly set roll_coords to silence warning
    ### ================================================
    pco2 = ds['pCO2_SOMFFN'].roll(lon=180, roll_coords='lon')
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon'] +180),
            'time': (['time'], ds['time'])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out

# read_model : read model pCO2

this function reads pCO2 from a member from either CESM, GFDL, MPI, or CanESM2 large ensemble

### Data storage
`/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles/`




In [None]:
def read_model(model=None, member=None):
    '''
    read_model : reads pco2 from model and member defined

    Inputs
    ==============
    model  : CESM, GFDL, MPI, CanESM2 
    member : 1-30, if CanESM2 then r1r1-r5r10

    Returns
    ==============
    output : xarrayDataset  

    notes  : - January 2017 is removed, since SOMFFN needs complete years
             - masked same way as SOMFFN SOCAT product
    '''
    ### ================================================
    ### Data directory
    ### ================================================
    data_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles/{model}'
    
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        fl = f'{data_dir}/member_{member:0>3}/pCO2_2D_mon_{model.upper()}{member:0>3}_1x1_198201-201701.nc'

    if model.upper()=='GFDL':
        fl = f'{data_dir}/member_{member:0>2}/pCO2_2D_mon_{model.upper()}{member:0>2}_1x1_198201-201701.nc'
   
    if model.upper()=='MPI':
        fl = f'{data_dir}/member_{member:0>3}/pCO2_2D_mon_{model.upper()}{member:0>3}_1x1_198201-201701.nc'

    if model.upper()=='CANESM2':
        fl = f'{data_dir}/member_{member}/pCO2_2D_mon_CanESM2{member}_1x1_198201-201712.nc'
  
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ### The inplace argument has been deprecated and will be removed in xarray 0.12.0.
    ds = ds.rename({'ylat':'lat'})
    ds = ds.rename({'xlon':'lon'})
        
    ### ================================================
    ### pco2, remove January 2017, since peter doesn't model it
    ### ================================================
    pco2 = ds['pCO2'][:-1,:,:]

    ### ================================================
    ### force time vector to be proper format and range
    ### ================================================
    dates = pd.date_range(start='1982-01-01T00:00:00.000000000', 
                      end='2016-12-01T00:00:00.000000000',freq='MS')+ np.timedelta64(14, 'D')
    da_dates = xr.DataArray(dates, dims='time')
    
    # old way was ds['time'][:-1]
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon']),
            'time': (['time'], da_dates)
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out

# read_model2 : read any model variable
This function reads the variable from model

In [None]:
def read_model2(model=None, member=None, variable=None):
    '''
    read_model2 : reads pco2 from model and member defined

    Inputs
    ==============
    model    : CESM, GFDL, MPI, CanESM2 
    member   : 1-30, if CanESM2 then r1r1-r5r10
    variable : SST, SSS, pCO2, iceFrac, U10, pATM 

    Returns
    ==============
    output : xarrayDataset  

    notes  : - January 2017 is removed, since SOMFFN needs complete years
             - masked same way as SOMFFN SOCAT product
    '''
    ### ================================================
    ### Data directory
    ### ================================================
    root_dir = f'/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/large_ensembles'
    data_dir = f'{root_dir}/{model}'
    
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        fl = f'{data_dir}/member_{member:0>3}/{variable}_2D_mon_{model.upper()}{member:0>3}_1x1_198201-201701.nc'

    if model.upper()=='GFDL':
        fl = f'{data_dir}/member_{member:0>2}/{variable}_2D_mon_{model.upper()}{member:0>2}_1x1_198201-201701.nc'
   
    if model.upper()=='MPI':
        fl = f'{data_dir}/member_{member:0>3}/{variable}_2D_mon_{model.upper()}{member:0>3}_1x1_198201-201701.nc'

    if model.upper()=='CANESM2':
        fl = f'{data_dir}/member_{member}/{variable}_2D_mon_CanESM2{member}_1x1_198201-201712.nc'
  
    if variable.upper()=='XCO2':
        fl=f'{root_dir}/CESM/member_001/XCO2_1D_mon_CESM001_native_198201-201701.nc'
    
        ### ================================================
        ### Open data struction 
        ### ================================================
        ds = xr.open_dataset(fl)

        ### ================================================
        ### pco2, remove January 2017, since peter doesn't model it
        ### ================================================
        ds_trimmed = ds[f'{variable}'][:-1]

        ### ================================================
        ### force time vector to be proper format and range
        ### ================================================
        dates = pd.date_range(start='1982-01-01T00:00:00.000000000', 
                              end='2016-12-01T00:00:00.000000000',
                              freq='MS')+ np.timedelta64(14, 'D')
        da_dates = xr.DataArray(dates, dims='time')
        
        ### ================================================
        ### Output dataset
        ### ================================================
        ds_out = xr.Dataset(
                {
                f'{variable}':(['time'], ds_trimmed),
                },

                coords={
                'time': (['time'], da_dates)
                })
        
    else:
        ### ================================================
        ### Open data struction 
        ### ================================================
        ds = xr.open_dataset(fl)

        ### The inplace argument has been deprecated and will be removed in xarray 0.12.0.
        ds = ds.rename({'ylat':'lat'})
        ds = ds.rename({'xlon':'lon'})

        ### ================================================
        ### pco2, remove January 2017, since peter doesn't model it
        ### ================================================
        ds_trimmed = ds[f'{variable}'][:-1,:,:]

        ### ================================================
        ### force time vector to be proper format and range
        ### ================================================
        dates = pd.date_range(start='1982-01-01T00:00:00.000000000', 
                              end='2016-12-01T00:00:00.000000000',
                              freq='MS')+ np.timedelta64(14, 'D')
        da_dates = xr.DataArray(dates, dims='time')

        # old way was ds['time'][:-1]
        ### ================================================
        ### Output dataset
        ### ================================================
        ds_out = xr.Dataset(
                {
                f'{variable}':(['time','lat','lon'], ds_trimmed),
                },

                coords={
                'lat': (['lat'], ds['lat']),
                'lon': (['lon'], ds['lon']),
                'time': (['time'], da_dates)
                })

        ### ================================================
        ### SOM-FFN mask
        ### mask created using _create_SOMFFN_mask.ipynb
        ### ================================================
        fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
        ds_mask = xr.open_dataset(fl_mask)
        ds_out[f'{variable}'] = ds_out[f'{variable}'].where(ds_mask['mask']==1)

    return ds_out

In [None]:
def read_somffn_floats(model=None, 
                       member=None):
    '''
    read_somffn : reads pco2 from model and member defined

    Inputs
    ==============
    model  : CESM, GFDL, MPI, CanESM2 
    member : 1-30, if CanESM2 then r1r1-r5r10

    Returns
    ==============
    output : xarrayDataset  

    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - masked same way as SOMFFN SOCAT product
    '''
    ###======================================
    ### Load data
    ###======================================
    # define path
    dir_data = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw'
    dir_float = f'{dir_data}/interpolated/SOMFFN_float'
    # load data
    ds = xr.open_dataset(f'{dir_float}/{model}/pCO2_2D_mon_{model}{member}_1x1_198201-201701_SOMFFN_float.nc')
    da = ds['pCO2_SOMFFN']
    
    ### ================================================
    ### roll(lon=180) moves the last 180 long points to end
    ### roll_coords will be set to False in the future. 
    ### Explicitly set roll_coords to silence warning
    ### ================================================
    
    # roll longitude
    da = da.roll(lon=180, roll_coords='lon')
    da['lon'] = np.arange(0.5,360,1)
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], da),
            },

            coords={
            'lat': (['lat'], da['lat']),
            'lon': (['lon'], da['lon']),
            'time': (['time'], da['time'])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out




# _Deprecated

In [None]:
'''
def read_peter(model='CESM', member='001'):
    
    read_peter(model, member) : reads pco2 from model and member defined
    
    input  : model  = 'CESM' or 'GFDL'
             member = 1-30 
             
    output : dictionary{pco2, lon, lat, time} 
    
    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - arctic points north of 79N are NaNed out
    ### Data directory
    data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/SOMFFN/' + model + '/'
    
    ### Get file path
    if model.upper()=='CESM':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
    
    if model.upper()=='GFDL':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        
    ### Open data struction 
    ds = xr.open_dataset(fl)

    ### load data
    time = ds['time']
    lat = ds['lat']
    
    ### The +180 converts to 0-360 grid
    #lon = ds['lon'].roll(lon=180)
    lon = ds['lon']
    #lon = np.arange(0.5,360,1)
    
    ### Remove north of 79
    #inds_arctic, = np.where(lat>79)
    #inds_arctic, = np.where(lat>70)
    
    ### roll(lon=180) moves the last 180 long points to end 
    pco2 = ds['pCO2_SOMFFN'].roll(lon=180)

    ### remove arctic
    #pco2 = pco2.where(~((lat>79)))
    #pco2 = pco2.where(~((lat>67) & (lat<80) & (lon>20) & (lon<180)))
    #pco2 = pco2.where(~((lat>67) & (lat<80) & (lon>-180) & (lon<-100)))

    ### remove hudson bay
    #pco2 = pco2.where(~((lat>40) & (lat<70) & (lon>-100) & (lon<-70)))
    #pco2 = pco2.where(~((lat>70) & (lat<80) & (lon>-130) & (lon<-80)))

    ### remove caspian sea, black sea, mediterranean sea, and baltic sea
    #pco2 = pco2.where(~((lat>24) & (lat<70) & (lon>10) & (lon<70)))
    
    
    
    ### remove arctic
    pco2 = pco2.where(~((lat>79)))
    pco2 = pco2.where(~((lat>67) & (lat<80) & (lon>20) & (lon<180)))
    pco2 = pco2.where(~((lat>67) & (lat<80) & (lon>-180) & (lon<-100)))

    ### remove hudson bay
    pco2 = pco2.where(~((lat>40) & (lat<70) & (lon>-100) & (lon<-70)))
    pco2 = pco2.where(~((lat>70) & (lat<80) & (lon>-130) & (lon<-80)))

    ### remove caspian sea, black sea, mediterranean sea, and baltic sea
    
    pco2 = pco2.where(~((lat>24) & (lat<65) & (lon>10) & (lon<70)))
    
    ### Remove little seas in indian ocean
    pco2 = pco2.where(~((lat>10) & (lat<25) & (lon>10) & (lon<45)))
    pco2 = pco2.where(~((lat>20) & (lat<50) & (lon>0) & (lon<20)))
    
    
    ### Store data in dictionary
    out = {'pco2': pco2,
          'lat': lat,
          'lon': lon +180,
          'time': time}
    return out
'''

In [None]:
def read_jones(model='CESM', member='001'):
    '''
    read_jones(model, member) : reads pco2 from model and member defined
    
    input  : model  = 'CESM' or 'GFDL'
             member = 1-30 
             
    output : dictionary{pco2, lon, lat, time} 
    
    notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
             - arctic points north of 79N are NaNed out
    '''
    ### Data directory
    data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/UEA-SI/' + model + '/'
    
    ### Get file path
    if model.upper()=='CESM':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201_201701_UEA-SI.nc'
    
    if model.upper()=='GFDL':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201_201701_UEA-SI.nc'
        
    ### Open data struction 
    ds = xr.open_dataset(fl)

    ### load data
    #time = ds['time']
    #lat = ds['ylat']
    #lon = ds['xlon']
    
    ### Remove north of 79
    #inds_arctic, = np.where(ds['ylat']>70)
    
    ### roll(lon=180) moves the last 180 long points to end 
    pco2 = ds['pCO2_UEA-SI']

    ### nans out the arctic
    #pco2[:, inds_arctic, :] = np.NaN
    
    ### Store data in dictionary
    #out = {'pco2': pco2,
    #      'lat': lat,
    #      'lon': lon,
    #      'time': time}
    
        ### Output dataset
    ds_out = xr.Dataset(
        {
        'pco2':(['time','lat','lon'], pco2),
        },

        coords={
        'lat': (['lat'], ds['lat']),
        'lon': (['lon'], ds['lon']),
        'time': (['time'], ds['time'])
        })
    

    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out

In [None]:
'''
original read_model function
def read_model(model='CESM', member='001'):
    #read_model(model, member) : reads pco2 from model and member defined
   # 
   # input  : model  = 'CESM' or 'GFDL'
   #          member = 1-30 
   #          
   # output : dictionary{pco2, lon, lat, time} 
   # 
   # notes  : - January 2017 is removed, since SOMFFN needs complete years
   #          - arctic points north of 79N are NaNed out
    ### ================================================
    ### Data directory
    ### ================================================
    data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/' + model + '/'
    
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701.nc'
    
    if model.upper()=='GFDL':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701.nc'
        
    if model.upper()=='MPI':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701.nc'
        
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ds.rename({'ylat':'lat'}, inplace=True)
    ds.rename({'xlon':'lon'}, inplace=True)
    
    ### Time remove last month (january 2017)
    #time = ds['time'][:-1]
    
    ### ================================================
    ### Remove north of 79
    ### ================================================
    #inds_arctic, = np.where(ds['lat']>79)
    
    #if jones:
    #    inds_arctic, = np.where(ds['lat']>70)
    
    ### ================================================
    ### pco2
    ### ================================================
    pco2 = ds['pCO2'][:-1,:,:]
    #pco2[:,inds_arctic, :] = np.NaN
    
    ### ================================================
    ### remove arctic
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>79)))
    #pco2 = pco2.where(~((ds['lat']>67) & (ds['lat']<80) & (ds['lon']>20) & (ds['lon']<180)))
    #pco2 = pco2.where(~((ds['lat']>67) & (ds['lat']<80) & (ds['lon']>360-180) & (ds['lon']<360-100)))

    ### ================================================
    ### remove hudson bay
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>40) & (ds['lat']<70) & (ds['lon']>360-100) & (ds['lon']<360-70)))
    #pco2 = pco2.where(~((ds['lat']>70) & (ds['lat']<80) & (ds['lon']>360-130) & (ds['lon']<360-80)))

    ### ================================================
    ### remove caspian sea, black sea, mediterranean sea, and baltic sea
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>24) & (ds['lat']<65) & (ds['lon']>10) & (ds['lon']<70)))
    
    ### ================================================
    ### Remove little seas in indian ocean
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>10) & (ds['lat']<25) & (ds['lon']>10) & (ds['lon']<45)))
    #pco2 = pco2.where(~((ds['lat']>20) & (ds['lat']<50) & (ds['lon']>0) & (ds['lon']<20)))
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon']),
            'time': (['time'], ds['time'][:-1])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out
'''

In [None]:
'''
original read_peter
def read_peter(model='CESM', member='001'):
    #read_peter(model, member) : reads pco2 from model and member defined
   # 
   # input  : model  = 'CESM' or 'GFDL'
   #          member = 1-30 
   #          
   # output : dictionary{pco2, lon, lat, time} 
   # 
   # notes  : - lon is changed from -180/180 to 0/360 grid to be consistent with model
   #          - arctic points north of 79N are NaNed out
    ### ================================================
    ### Data directory
    ### ================================================
    data_dir = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/raw/SOMFFN/' + model + '/'
        
    ### ================================================
    ### Get file path
    ### ================================================
    if model.upper()=='CESM':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
    
    if model.upper()=='GFDL':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>2}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        
    if model.upper()=='MPI':
        fl = data_dir + 'pCO2_2D_mon_' + model.upper() + '{0:0>3}'.format(member) + '_1x1_198201-201701_SOMFFN.nc'
        
    ### ================================================
    ### Open data struction 
    ### ================================================
    ds = xr.open_dataset(fl)

    ### ================================================
    ### roll(lon=180) moves the last 180 long points to end
    ### ================================================
    pco2 = ds['pCO2_SOMFFN'].roll(lon=180)
    
    ### ================================================
    ### remove arctic
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>79)))
    #pco2 = pco2.where(~((ds['lat']>67) & (ds['lat']<80) & (ds['lon']>20)   & (ds['lon']<180)))
    #pco2 = pco2.where(~((ds['lat']>67) & (ds['lat']<80) & (ds['lon']>-180) & (ds['lon']<-100)))

    ### ================================================
    ### remove hudson bay
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>40) & (ds['lat']<70) & (ds['lon']>-100) & (ds['lon']<-70)))
    #pco2 = pco2.where(~((ds['lat']>70) & (ds['lat']<80) & (ds['lon']>-130) & (ds['lon']<-80)))

    ### ================================================
    ### remove caspian sea, black sea, mediterranean sea, and baltic sea
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>24) & (ds['lat']<65) & (ds['lon']>10) & (ds['lon']<70)))
    
    ### ================================================
    ### Remove little seas in indian ocean
    ### ================================================
    #pco2 = pco2.where(~((ds['lat']>10) & (ds['lat']<25) & (ds['lon']>10) & (ds['lon']<45)))
    #pco2 = pco2.where(~((ds['lat']>20) & (ds['lat']<50) & (ds['lon']>0)  & (ds['lon']<20)))
    
    ### ================================================
    ### Output dataset
    ### ================================================
    ds_out = xr.Dataset(
            {
            'pco2':(['time','lat','lon'], pco2),
            },

            coords={
            'lat': (['lat'], ds['lat']),
            'lon': (['lon'], ds['lon'] +180),
            'time': (['time'], ds['time'])
            })
    
    ### ================================================
    ### SOM-FFN mask
    ### mask created using _create_SOMFFN_mask.ipynb
    ### ================================================
    fl_mask = '/local/data/artemis/workspace/gloege/SOCAT-LE/data/clean/mask_somffn.nc'
    ds_mask = xr.open_dataset(fl_mask)
    ds_out['pco2'] = ds_out['pco2'].where(ds_mask['mask']==1)
    
    return ds_out
'''