### aim: make yearly medusa subset files

### date: Jan 5, 2023

In [3]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')


In [4]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


In [5]:
import numpy as np
from cmocean import cm
import cartopy as cp
import cartopy.crs as ccrs
import netCDF4 as nc
import matplotlib.pyplot as plt
import xarray as xr
import sys
sys.path.append('/gpfs/home/mep22dku/scratch/SOZONE')
#list of models
sys.path.append('/gpfs/home/mep22dku/scratch/SOZONE/UTILS')
import lom
import utils as ut

%matplotlib inline
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')
import cartopy.feature as cfeature
from importlib import reload
import matplotlib.path as mpath
import glob
import pickle
import pandas as pd
import seawater
import time
plt.rcParams.update({'font.size': 12})
font = {'family' : 'normal',
'weight' : 'normal',
'size'   : 12}

plt.rc('font', **font)

In [16]:
q = xr.open_dataset('/gpfs/data/greenocean/software/resources/MEDUSA/cj198_1950/medusa_cj198o_1m_19500101-19500201_diad-T.nc')

In [17]:
# print(q)

<xarray.Dataset>
Dimensions:               (axis_nbounds: 2, deptht: 75, nvertex: 4, time_counter: 1, x: 362, y: 332)
Coordinates:
    nav_lat               (y, x) float32 ...
    nav_lon               (y, x) float32 ...
  * deptht                (deptht) float32 0.50576 1.5558553 ... 5902.0576
    time_centered         (time_counter) object ...
  * time_counter          (time_counter) float64 0.0
Dimensions without coordinates: axis_nbounds, nvertex, x, y
Data variables:
    bounds_lon            (y, x, nvertex) float32 ...
    bounds_lat            (y, x, nvertex) float32 ...
    area                  (y, x) float32 ...
    deptht_bounds         (deptht, axis_nbounds) float32 ...
    thkcello              (time_counter, deptht, y, x) float32 ...
    time_centered_bounds  (time_counter, axis_nbounds) object ...
    INVTN                 (time_counter, y, x) float32 ...
    INVTSI                (time_counter, y, x) float32 ...
    INVTFE                (time_counter, y, x) float32 ...

In [26]:
def make_yearly_subset_nc(yr,runname):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROCESSED/medusa_{runname}_1y_{yr}_diad-T-subset.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable
    q = xr.open_dataset('/gpfs/data/greenocean/software/resources/MEDUSA/cj198_1950/medusa_cj198o_1m_19500101-19500201_diad-T.nc')
    nav_lat = q['nav_lat'].values
    nav_lon = q['nav_lon'].values
    deptht = q['deptht'].values
    
    ## define variables that we are saving
    OCN_PCO2 = np.zeros([12,332,362])
    CO2FLUX = np.zeros([12,332,362])
    area = np.zeros([332,362])

    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/{runname}*/*_1m_{yr}*01-*diad-T.nc'
    fils = glob.glob(td)
    fils.sort()
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            if i == 0:
                area[:,:] = tfil['area'][:,:].values
            OCN_PCO2[i,:,:] = tfil['OCN_PCO2'][:,:].values
            CO2FLUX[i,:,:] = tfil['CO2FLUX'][:,:].values



        # define data with variable attributes
        data_vars = {'OCN_PCO2':(['time_counter', 'nav_lat', 'nav_lon'], OCN_PCO2,
                                 {'units': 'µatm',
                                  'long_name':'Surface ocean pCO2'}),
                     'CO2FLUX':(['time_counter', 'nav_lat', 'nav_lon'], CO2FLUX,
                                 {'units': 'mmol-C/m2/d',
                                  'long_name':'Air-sea CO2 flux'}),   
                     'area':(['nav_lat', 'nav_lon'], area,
                                 {'units': 'mmol-C/m2/d',
                                  'long_name':'Air-sea CO2 flux'})   
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),\
                 'nav_lat': (['nav_lat'], nav_lat[:,0]),\
                 'nav_lon': (['nav_lon'], nav_lon[0,:]),\
                 'deptht': (['deptht'], deptht)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
make_yearly_subset_nc(1954,'cj198')


/gpfs/data/greenocean/software/resources/MEDUSA/PROCESSED/medusa_cj198_1y_1954_diad-T-subset.nc


this cell has the files that have been processed 

In [29]:
runname = 'cj198'

# yrs = np.arange(1950,1970,1)
# for y in yrs:
#     make_yearly_subset_nc(y,runname)
    
# yrs = np.arange(2000,2015,1)
# for y in yrs:
#     make_yearly_subset_nc(y,runname)

# yrs = np.arange(1990,2000,1)
# for y in yrs:
#     make_yearly_subset_nc(y,runname)
    
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     make_yearly_subset_nc(y,runname)

In [30]:
ls = ['/gpfs/data/greenocean/software/resources/MEDUSA/PROCESSED/medusa_cj198_1y_2000_diad-T-subset.nc', \
'/gpfs/data/greenocean/software/resources/MEDUSA/PROCESSED/medusa_cj198_1y_2001_diad-T-subset.nc']

w = xr.open_mfdataset(ls)