### aim: make yearly medusa subset files

### date: Jan 5, 2023

In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')


In [3]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])?  y


In [4]:
import numpy as np
from cmocean import cm
import cartopy as cp
import cartopy.crs as ccrs
import netCDF4 as nc
import matplotlib.pyplot as plt
import xarray as xr
import sys
sys.path.append('/gpfs/home/mep22dku/scratch/SOZONE' )
#list of models
sys.path.append('/gpfs/home/mep22dku/scratch/SOZONE/UTILS')
import lom
import utils as ut

%matplotlib inline
import warnings
from datetime import datetime
warnings.filterwarnings('ignore')
import cartopy.feature as cfeature
from importlib import reload
import matplotlib.path as mpath
import glob
import pickle
import pandas as pd
import seawater
import time
plt.rcParams.update({'font.size': 12})
font = {'family' : 'normal',
'weight' : 'normal',
'size'   : 12}

plt.rc('font', **font)

## temperature and salinity, historic 1A

In [1]:
def make_yearly_subset_gridV(yr,runname,pref = 'nemo', dtype = 'grid-V', tdir = 'ukesm_1A_gridT_TS'):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/{pref}_{runname}_1y_{yr}_{dtype}-vomecrty.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/{tdir}/{pref}_{runname}*_1m_{yr}*01-*{dtype}.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values
        deptht = q['deptht'].values
        
        vo = np.zeros([12,75,332,362])
        thetao = np.zeros([12,75,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            vo[i,:,:,:] = tfil['vo'][:,:,:].values

        # define data with variable attributes
        data_vars = {'vomecrty':(['time_counter', 'deptht','y', 'x'], vo,
                                 {'units': 'm/s',
                                  'long_name':'vomecrty'}),

                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon),
            'deptht': (['deptht'], deptht)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


In [16]:
#1h
runname = 'bc370'
yrs = np.arange(1950,2015,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)    

#1a
runname = 'be682'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)

#1b
runname = 'ce417'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)

#2h
runname = 'cj198'
yrs = np.arange(1950,2015,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)    

#2a
runname = 'cj880'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)
    
#2b
runname = 'cj881'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)
    
    
#3h
runname = 'cj200'
yrs = np.arange(1990,2015,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)    
    
#3a
runname = 'cj484'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)    
    
#3a
runname = 'cj504'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridV(y,runname)   

NameError: name 'make_yearly_subset_gridV' is not defined

In [14]:
def make_yearly_subset_gridT(yr,runname,pref = 'nemo', dtype = 'grid-T', tdir = 'ukesm_1A_gridT_TS'):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/{pref}_{runname}_1y_{yr}_{dtype}-TS.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/{tdir}/{pref}_{runname}*_1m_{yr}*01-*{dtype}.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values
        deptht = q['deptht'].values
        
        so = np.zeros([12,75,332,362])
        thetao = np.zeros([12,75,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            so[i,:,:,:] = tfil['so'][:,:,:].values
            thetao[i,:,:,:] = tfil['thetao'][:,:,:].values
        # define data with variable attributes
        data_vars = {'vosaline':(['time_counter', 'deptht','y', 'x'], so,
                                 {'units': 'g/kg',
                                  'long_name':'salinity'}),
                     'votemper':(['time_counter', 'deptht', 'y', 'x'], thetao,
                                 {'units': '',
                                  'long_name':'temperature'}),   
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon),
            'deptht': (['deptht'], deptht)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


In [15]:
#1h
runname = 'bc370'
yrs = np.arange(1950,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)    

#1a
runname = 'be682'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)

#1b
runname = 'ce417'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)

#2h
runname = 'cj198'
yrs = np.arange(1950,2015,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)    

#2a
runname = 'cj880'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)
    
#2b
runname = 'cj881'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)
    
    
#3h
runname = 'cj200'
yrs = np.arange(1990,2015,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)    
    
#3a
runname = 'cj484'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)    
    
#3a
runname = 'cj504'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_gridT(y,runname)   

/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1950_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1951_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1952_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1953_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1954_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1955_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1956_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1957_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1958_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1959_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/nemo_bc370_1y_1960_grid-T-TS.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/

## co2 and flux

In [13]:
def check_filetype_present_TADIC(yr,runname,rtype):

    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_gridT_TS/nemo_{runname}*1m_{yr}*{rtype}*nc'

    tw = glob.glob(savenam)
    #print(tw)
    if len(tw) != 12:
        print(f'{yr}, {runname} {rtype} missing')
    else:
        print(f'{yr} OKOKOKOKOKOKOK {runname}')
    
#1h OK
rtype = 'grid-T'
runname = 'bc370'
yrs = np.arange(1950,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    

#1a
runname = 'be682'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()
#1b
runname= 'ce417'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()
# #2h
runname = 'cj198'
yrs = np.arange(1950,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()
# #2a
runname = 'cj880'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()    
# #2b
runname= 'cj881'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)

print()    
# #3h
runname = 'cj200'
yrs = np.arange(1990,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()
# #3a
runname = 'cj484'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()    
# #3a
runname = 'cj504'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)   

1950 OKOKOKOKOKOKOK bc370
1951 OKOKOKOKOKOKOK bc370
1952 OKOKOKOKOKOKOK bc370
1953 OKOKOKOKOKOKOK bc370
1954 OKOKOKOKOKOKOK bc370
1955 OKOKOKOKOKOKOK bc370
1956 OKOKOKOKOKOKOK bc370
1957 OKOKOKOKOKOKOK bc370
1958 OKOKOKOKOKOKOK bc370
1959 OKOKOKOKOKOKOK bc370
1960 OKOKOKOKOKOKOK bc370
1961 OKOKOKOKOKOKOK bc370
1962 OKOKOKOKOKOKOK bc370
1963 OKOKOKOKOKOKOK bc370
1964 OKOKOKOKOKOKOK bc370
1965 OKOKOKOKOKOKOK bc370
1966 OKOKOKOKOKOKOK bc370
1967 OKOKOKOKOKOKOK bc370
1968 OKOKOKOKOKOKOK bc370
1969 OKOKOKOKOKOKOK bc370
1970 OKOKOKOKOKOKOK bc370
1971 OKOKOKOKOKOKOK bc370
1972 OKOKOKOKOKOKOK bc370
1973 OKOKOKOKOKOKOK bc370
1974 OKOKOKOKOKOKOK bc370
1975 OKOKOKOKOKOKOK bc370
1976 OKOKOKOKOKOKOK bc370
1977 OKOKOKOKOKOKOK bc370
1978 OKOKOKOKOKOKOK bc370
1979 OKOKOKOKOKOKOK bc370
1980 OKOKOKOKOKOKOK bc370
1981 OKOKOKOKOKOKOK bc370
1982 OKOKOKOKOKOKOK bc370
1983 OKOKOKOKOKOKOK bc370
1984 OKOKOKOKOKOKOK bc370
1985 OKOKOKOKOKOKOK bc370
1986 OKOKOKOKOKOKOK bc370
1987 OKOKOKOKOKOKOK bc370
1988 OKOKOKO

In [11]:
def make_yearly_subset_diadT_co2(yr,runname):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_{runname}_1y_{yr}_diad-T-co2.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_diadT_co2/*/medusa_{runname}*_1m_{yr}*01-*diad-T.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        #print(q)
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values

        OCN_PCO2 = np.zeros([12,332,362])
        CO2FLUX = np.zeros([12,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            OCN_PCO2[i,:,:] = tfil['OCN_PCO2'][:,:].values
            CO2FLUX[i,:,:] = tfil['CO2FLUX'][:,:].values
            
        # define data with variable attributes
        print(np.nanmax(OCN_PCO2))
        
        data_vars = {'OCN_PCO2':(['time_counter','y', 'x'], OCN_PCO2,
                                 {'units': 'µatm',
                                  'long_name':'Surface ocean pCO2'}),
                     'CO2FLUX':(['time_counter', 'y', 'x'], CO2FLUX,
                                 {'units': 'mmol-C/m2/d',
                                  'long_name':'Air-sea CO2 flux'}),   
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


## pco2 yearly files

In [7]:
#1h
# runname = 'bc370'
# yrs = np.arange(1950,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)    

#1a
# runname = 'be682'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)

# #1b
# runname = 'ce417'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)

# #2h
# runname = 'cj198'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)    

# #2a
# runname = 'cj880'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)
    
# #2b
# runname = 'cj881'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)
    
    
# #3h
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)    
    
# #3a
# runname = 'cj484'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)    
    
# #3a
# runname = 'cj504'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_co2(y,runname)   

In [8]:
def make_yearly_subset_gridT_mld(yr,runname):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_{runname}_1y_{yr}_grid-T-MLD.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_gridT_mld/nemo_{runname}*_1m_{yr}*01-*grid-T.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        #print(q)
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values

        somxl010 = np.zeros([12,332,362])
        #CO2FLUX = np.zeros([12,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            somxl010[i,:,:] = tfil['somxl010'][:,:].values
            #CO2FLUX[i,:,:] = tfil['CO2FLUX'][:,:].values
        # define data with variable attributes
        data_vars = {'somxl010':(['time_counter','y', 'x'], somxl010,
                                 {'units': 'µatm',
                                  'long_name':'MLD'}),   
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


In [9]:
# #1h
# runname = 'bc370'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)    

# #1a
# runname = 'be682'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)

# #1b
# runname = 'ce417'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)

# #2h
# runname = 'cj198'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)    

# #2a
# runname = 'cj880'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)
    
# #2b
# runname = 'cj881'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)
    
    
# #3h
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)    
    
# #3a
# runname = 'cj484'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)    
    
# #3a
# runname = 'cj504'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT_mld(y,runname)   

### diadT auxillary variables

In [10]:
v = xr.open_dataset('/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_diadT_aux/medusa_cj484o_1m_20991201-21000101_diad-T.nc')
v

In [11]:
def make_yearly_subset_diadT_aux(yr,runname):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_{runname}_1y_{yr}_diad-T-aux.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_diadT_aux/*_{runname}*_1m_{yr}*01-*diad-T.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        #print(q)
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values

        CHL_MLD = np.zeros([12,332,362])
        CO2FLUX = np.zeros([12,332,362])
        ML_PRD = np.zeros([12,332,362])
        ML_PRN = np.zeros([12,332,362])
        OCN_DPCO2 = np.zeros([12,332,362])
        OCN_PCO2 = np.zeros([12,332,362])
        WIND = np.zeros([12,332,362])
        #CO2FLUX = np.zeros([12,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            CHL_MLD[i,:,:] = tfil['CHL_MLD'][:,:].values
            CO2FLUX[i,:,:] = tfil['CO2FLUX'][:,:].values
            ML_PRD[i,:,:] = tfil['ML_PRD'][:,:].values
            ML_PRN[i,:,:] = tfil['ML_PRN'][:,:].values
            OCN_DPCO2[i,:,:] = tfil['OCN_DPCO2'][:,:].values
            OCN_PCO2[i,:,:] = tfil['OCN_PCO2'][:,:].values
            WIND[i,:,:] = tfil['WIND'][:,:].values
            #CO2FLUX[i,:,:] = tfil['CO2FLUX'][:,:].values
        # define data with variable attributes
        data_vars = {'CHL_MLD':(['time_counter','y', 'x'], CHL_MLD,
                                 {'units': '',
                                  'long_name':''}),   
                    'CO2FLUX':(['time_counter','y', 'x'], CO2FLUX,
                                 {'units': '',
                                  'long_name':''}), 
                     'ML_PRD':(['time_counter','y', 'x'], ML_PRD,
                                 {'units': '',
                                  'long_name':''}), 
                     'ML_PRN':(['time_counter','y', 'x'], ML_PRN,
                                 {'units': '',
                                  'long_name':''}), 
                     'OCN_DPCO2':(['time_counter','y', 'x'], OCN_DPCO2,
                                 {'units': '',
                                  'long_name':''}), 
                     'OCN_PCO2':(['time_counter','y', 'x'], OCN_PCO2,
                                 {'units': '',
                                  'long_name':''}), 
                     'WIND':(['time_counter','y', 'x'], WIND,
                                 {'units': '',
                                  'long_name':''}), 
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon)}

        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'----------------seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


In [12]:
# #1h
# runname = 'bc370'
# yrs = np.arange(1950,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)    

# #1a
# runname = 'be682'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)

# #1b
# runname = 'ce417'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)

# #2h
# runname = 'cj198'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)    

# #2a
# runname = 'cj880'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)
    
# #2b
# runname = 'cj881'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)
    
    
# #3h
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)    
    
# #3a
# runname = 'cj484'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)    
    
# #3a
# runname = 'cj504'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_gridT(y,runname)   

In [13]:
# #1h
# runname = 'bc370'
# yrs = np.arange(1950,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)    

# #1a
# runname = 'be682'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)

# #1b
# runname = 'ce417'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)

# #2h
# runname = 'cj198'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)    

# #2a
# runname = 'cj880'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)
    
# #2b
# runname = 'cj881'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)
    
    
# #3h
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)    
    
# #3a
# runname = 'cj484'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)    
    
# #3a
# runname = 'cj504'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     make_yearly_subset_diadT_aux(y,runname)   

## check for files?

In [14]:
def check_filetype_present(yr,runname,rtype):

    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_{runname}_1y_{yr}_*{rtype}*.nc'

    tw = glob.glob(savenam)
    if len(tw) != 1:
        print(f'{yr}, {runname} {rtype} missing')
    
#make_yearly_subset_nc(1954,'cj198')


In [15]:
def check_filetype_present_TADIC(yr,runname,rtype):

    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_ptrcT_TADIC/medusa_{runname}*1m_{yr}*{rtype}*nc'

    tw = glob.glob(savenam)
    #print(tw)
    if len(tw) != 12:
        print(f'{yr}, {runname} {rtype} missing')
    else:
        print(f'{yr} OKOKOKOKOKOKOK {runname}')
    
#1h OK
rtype = 'ptrc-T'
runname = 'bc370'
yrs = np.arange(1950,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    

#1a
runname = 'be682'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()
#1b
runname= 'ce417'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()
# #2h
runname = 'cj198'
yrs = np.arange(1950,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()
# #2a
runname = 'cj880'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)
print()    
# #2b
runname= 'cj881'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)

print()    
# #3h
runname = 'cj200'
yrs = np.arange(1990,2015,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()
# #3a
runname = 'cj484'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)    
print()    
# #3a
runname = 'cj504'
yrs = np.arange(2015,2100,1)
for y in yrs:
    check_filetype_present_TADIC(y,runname,rtype)   

1950 OKOKOKOKOKOKOK bc370
1951 OKOKOKOKOKOKOK bc370
1952 OKOKOKOKOKOKOK bc370
1953 OKOKOKOKOKOKOK bc370
1954 OKOKOKOKOKOKOK bc370
1955 OKOKOKOKOKOKOK bc370
1956 OKOKOKOKOKOKOK bc370
1957 OKOKOKOKOKOKOK bc370
1958 OKOKOKOKOKOKOK bc370
1959 OKOKOKOKOKOKOK bc370
1960 OKOKOKOKOKOKOK bc370
1961 OKOKOKOKOKOKOK bc370
1962 OKOKOKOKOKOKOK bc370
1963 OKOKOKOKOKOKOK bc370
1964 OKOKOKOKOKOKOK bc370
1965 OKOKOKOKOKOKOK bc370
1966 OKOKOKOKOKOKOK bc370
1967 OKOKOKOKOKOKOK bc370
1968 OKOKOKOKOKOKOK bc370
1969 OKOKOKOKOKOKOK bc370
1970 OKOKOKOKOKOKOK bc370
1971 OKOKOKOKOKOKOK bc370
1972 OKOKOKOKOKOKOK bc370
1973 OKOKOKOKOKOKOK bc370
1974 OKOKOKOKOKOKOK bc370
1975 OKOKOKOKOKOKOK bc370
1976 OKOKOKOKOKOKOK bc370
1977 OKOKOKOKOKOKOK bc370
1978 OKOKOKOKOKOKOK bc370
1979 OKOKOKOKOKOKOK bc370
1980 OKOKOKOKOKOKOK bc370
1981 OKOKOKOKOKOKOK bc370
1982 OKOKOKOKOKOKOK bc370
1983 OKOKOKOKOKOKOK bc370
1984 OKOKOKOKOKOKOK bc370
1985 OKOKOKOKOKOKOK bc370
1986 OKOKOKOKOKOKOK bc370
1987 OKOKOKOKOKOKOK bc370
1988 OKOKOKO

### checking that certain file types are present

ok:
T-co2
MLD

missing

In [16]:
# #1h
# rtype = 'aux'
# runname = 'bc370'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)    

# #1a
# runname = 'be682'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)

# #1b
# runname= 'ce417'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)

# #2h
# runname = 'cj198'
# yrs = np.arange(1950,2015,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)    

# #2a
# runname = 'cj880'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)
    
# #2b
# runname= 'cj881'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)
    
    
# #3h
# runname = 'cj200'
# yrs = np.arange(1990,2015,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)    
    
# #3a
# runname = 'cj484'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)    
    
# #3a
# runname = 'cj504'
# yrs = np.arange(2015,2100,1)
# for y in yrs:
#     check_filetype_present(y,runname,rtype)   

## make yearly wind files

In [17]:
scen = '1A'
ts = f'/gpfs/home/mep22dku/scratch/SOZONE/MO_pipeline/EXTRACT/ncs/scen_{scen}_monthly_wspd.nc'
tw = xr.open_dataset(ts)

In [18]:
def make_yearly_subset_ptrc_TADICCHL(yr,runname):

    #what is new file going to be called:
    savenam = f'/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_{runname}_1y_{yr}_diad-T-co2.nc'
    print(savenam)
    
    #get by-month dimension for this year:
    times = pd.date_range(f"{yr}/01/01",f"{yr+1}/01/01",freq='M',closed='left')
    #get the spatial dimensions from a variable


    #get the files we are converting
    td = f'/gpfs/data/greenocean/software/resources/MEDUSA/ukesm_allscen_ptrcT_TADIC/medusa_{runname}*_1m_{yr}*01-*ptrc-T.nc'
    fils = glob.glob(td)
    fils.sort()
    
    if len(fils) != 12:
        print(f'missing files for year {yr} in {runname}: we have {len(fils)}')
        return

    else:
        ## define variables that we are saving
        
        q = xr.open_dataset(fils[0])
        #print(q)
        nav_lat = q['nav_lat'].values
        nav_lon = q['nav_lon'].values
        deptht = q['deptht'].values

        ALK = np.zeros([12,75,332,362])
        DIC = np.zeros([12,75,332,362])
        CHD = np.zeros([12,75,332,362])
        CHN = np.zeros([12,75,332,362])
        
        for i in range(0,12):
            tfil = xr.open_dataset(fils[i])
            ALK[i,:,:,:] = tfil['ALK'][:,:,:].values
            DIC[i,:,:,:] = tfil['DIC'][:,:,:].values
            CHD[i,:,:,:] = tfil['CHD'][:,:,:].values
            CHN[i,:,:,:] = tfil['CHN'][:,:,:].values
            
        # define data with variable attributes

        
        data_vars = {'ALK':(['time_counter','deptht','y', 'x'], ALK,
                                 {'units': 'mmol/m3',
                                  'long_name':''}),
                     'DIC':(['time_counter','deptht', 'y', 'x'], DIC,
                                 {'units': 'mmol/m3',
                                  'long_name':''}),   
                     'CHD':(['time_counter','deptht', 'y', 'x'], CHD,
                                 {'units': 'mg-Chl/m3',
                                  'long_name':''}), 
                     'CHN':(['time_counter','deptht', 'y', 'x'], CHN,
                                 {'units': 'mg-Chl/m3',
                                  'long_name':''}), 
                    }

        # define coordinates
        coords = {'time_counter': (['time_counter'], times),
            'nav_lat': (['y','x'], nav_lat),
            'nav_lon': (['y','x'], nav_lon),
            'deptht': (['deptht'], deptht)}


        # define global attributes
        attrs = {'made in':'SOZONE/MEDUSA/makeYearlyMEDUSAsubsetfiles.ipynb',
                'desc': 'yearly medusa files, saving only variables of interest'
                }

        ds = xr.Dataset(data_vars=data_vars,
                        coords=coords,
                        attrs=attrs)

        try:
            ds.to_netcdf(savenam)
        except:
            print(f'seems like {savenam} exists already')
    
#make_yearly_subset_nc(1954,'cj198')


In [None]:
#1h
runname = 'bc370'
yrs = np.arange(1950,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)    

#1a
runname = 'be682'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)

#1b
runname = 'ce417'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)

#2h
runname = 'cj198'
yrs = np.arange(1950,2015,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)    

#2a
runname = 'cj880'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)
    
#2b
runname = 'cj881'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)
    
    
#3h
runname = 'cj200'
yrs = np.arange(1990,2015,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)    
    
#3a
runname = 'cj484'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)    
    
#3a
runname = 'cj504'
yrs = np.arange(2015,2100,1)
for y in yrs:
    make_yearly_subset_ptrc_TADICCHL(y,runname)   

/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1950_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1951_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1952_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1953_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1954_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1955_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1956_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1957_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1958_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1959_diad-T-co2.nc
/gpfs/data/greenocean/software/resources/MEDUSA/PROC2/medusa_bc370_1y_1960_diad-T-co2.nc
/gpfs/data/greenocean