# Data Processing U850/V850 all files
Teagan King, John Truesdale, Katie Dagon
Updated Feb 2022

## Import libraries

In [1]:
import glob
import xarray as xr
import cftime
import geocat.comp as gc
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

### Set up Dask

In [2]:
# Import dask
import dask

# Use dask jobqueue
from dask_jobqueue import PBSCluster

# Import a client
from dask.distributed import Client

# Setup your PBSCluster
nmem='35GB' # specify memory here so it duplicates below
cluster = PBSCluster(
    cores=1, # The number of cores you want
    memory=nmem, # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='/glade/scratch/$USER/local_dask', # Use your local directory
    resource_spec='select=1:ncpus=1:mem='+nmem, # Specify resources
    account='P93300313', # Input your project ID here, previously this was known as 'project', now is 'account'
    walltime='04:30:00', # Amount of wall time
    interface='ib0', # Interface to use
)

# Scale up
cluster.scale(50)

# Change your url to the dask dashboard so you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

In [3]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/tking/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/tking/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.47:43957,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/tking/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
# Use this cell to stop the dask workers
# cluster.scale(0)

## Read in files and set pressure level(s)

In [4]:
# location of data
datadir = '/glade/scratch/tking/cgnet/rcp85_2086_2100/'

ufile_format = '*.U.*.nc' # U files are b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4, 3hourly avg from 2080-2100
vfile_format = '*.V.*.nc' # V files are b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4, 3hourly avg from 2080-2100
psfile_format = '*.PS.*.nc' # PS files are b.e13.BRCP85C5CN.ne120_g16.003a.cam.h3, 3hourly avg from 2080-2100

# desired pressure level:
plevel = 850.0  # hPa

# use glob.glob() to get actual files, and sort in order to retain correctly matching files!
ufilenames = sorted(glob.glob(datadir+ufile_format))
vfilenames = sorted(glob.glob(datadir+vfile_format))
psfilenames = sorted(glob.glob(datadir+psfile_format))

# make new .nc files but replace U/V with U850/V850
unew_files = []
for filename in ufilenames:
    unew_files.append((filename.replace('.U.', '.U850.')).split('/')[-1])

vnew_files = []
for filename in vfilenames:
    vnew_files.append((filename.replace('.V.', '.V850.')).split('/')[-1])

In [7]:
# Use this cell if need to re-run notebook for subset of files, eg if connection breaks before all files are complete
# ufilenames = ufilenames[4:]
# vfilenames = vfilenames[4:]
# psfilenames = psfilenames[4:]
# unew_files = unew_files[4:]
# vnew_files = vnew_files[4:]

In [8]:
# print(ufilenames)
print(vfilenames)
print(psfilenames)
print(vnew_files)
print(unew_files)

['/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V.2098010100Z-2098123121Z.nc', '/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V.2099010100Z-2099123121Z.nc', '/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V.2100010100Z-2100123121Z.nc']
['/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h3.PS.2098010100Z-2098123121Z.nc', '/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h3.PS.2099010100Z-2099123121Z.nc', '/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h3.PS.2100010100Z-2100123121Z.nc']
['b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2098010100Z-2098123121Z.nc', 'b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2099010100Z-2099123121Z.nc', 'b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2100010100Z-2100123121Z.nc']
['b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2098010100Z-2098123121Z.nc', 'b.e13.BRCP85

### Run all files
This can take a bit over an hour per file...

In [9]:
# define dummy values for t_bot and phi_sfc
# reasoning for this described in GitHub issue: https://github.com/NCAR/geocat-comp/issues/26
# Hopefully the need for this will be removed soon.
t_bot=xr.DataArray([])
phi_sfc=xr.DataArray([])

for wind in ['V']:
    for file_index in range(len(psfilenames)):        
        if wind=='U':
            windfile = xr.open_dataset(ufilenames[file_index], chunks={"time": 100, "ncol":10000}) # adjusting chunks for ~100mb size chunks
        elif wind=='V':
            windfile = xr.open_dataset(vfilenames[file_index], chunks={"time": 100, "ncol":10000}) # adjusting chunks for ~100mb size chunks
        psfile = xr.open_dataset(psfilenames[file_index], chunks={"time": 100, "ncol":10000}) # using the same chunk size for consistency
        print('working on {}'.format(psfilenames[file_index].split('.PS.')[-1][:4]))

        file_wind = windfile[wind] # wind at each level
        file_PS = psfile['PS'] # surface pressure

        pref = psfile['P0'] # reference pressure

        # hybrid level coordinates
        hyam = windfile['hyam']
        hybm = windfile['hybm']
        
        # interpolate to get correct grid levels using GeoCAT's interpolate hybrid to pressure function:
        plevdata = gc.interpolation.interp_hybrid_to_pressure(file_wind,  # 3d field U/V (time x lev x ncol)
                            file_PS,  # surface pressure (time x ncol)
                            hyam, hybm,  # coefficients to calculate pressure at each level
                            p0=pref.values,  # reference pressure
                            new_levels=np.array([85000], dtype='float32'),  # interpolate to 850 pressure level
                            lev_dim=None,  # lev is default
                            method='log', # use log because pressure falls off logarithmically

                            extrapolate=True, # extrapolate below ground values
                            variable='other',
                            t_bot=t_bot,  # xarray.DataArray Temperature in Kelvin at the lowest layer of the model.
                                             # Not necessarily the same as surface temperature.
                                             # Required if ``extrapolate`` is True.
                            phi_sfc=phi_sfc  # Geopotential in J/kg at the lowest layer of the model.
                                             # Not necessarily the same as surface geopotential.
                                             # Required if ``extrapolate`` is True.
                            )   #TODO: implement dask here
        print('finished interpolation')
        if wind == 'U':
            file_save = unew_files[file_index]
        elif wind == 'V':
            file_save = vnew_files[file_index]
        plevdata.to_netcdf('/glade/scratch/tking/cgnet/rcp85_2086_2100/{}'.format(file_save))
        print('generated {}'.format(file_save))

working on 2098
finished interpolation
generated b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2098010100Z-2098123121Z.nc
working on 2099
finished interpolation
generated b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2099010100Z-2099123121Z.nc
working on 2100
finished interpolation
generated b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.V850.2100010100Z-2100123121Z.nc


In [None]:
# check dashboard listed above for dask progress board

In [11]:
ls /glade/scratch/tking/cgnet/rcp85_2086_2100/*U850*.nc

/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2080010100Z-2080123118Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2081010100Z-2081123118Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2086010100Z-2086123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2087010100Z-2087123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2088010100Z-2088123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2089010100Z-2089123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2090010100Z-2090123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2091010100Z-2091123121Z.nc
/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2

In [12]:
# %%bash
# module load nco/4.7.9

# copy printed files above into U850_V850_to_regrid list below..

# RCP 8.5
U850_V850_to_regrid=['/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2080010100Z-2080123118Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2081010100Z-2081123118Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2086010100Z-2086123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2087010100Z-2087123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2088010100Z-2088123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2089010100Z-2089123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2090010100Z-2090123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2091010100Z-2091123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2092010100Z-2092123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2093010100Z-2093123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2094010100Z-2094123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2095010100Z-2095123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2096010100Z-2096123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2097010100Z-2097123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2098010100Z-2098123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2099010100Z-2099123121Z.nc',
'/glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2100010100Z-2100123121Z.nc']

for file in U850_V850_to_regrid:
    print('ncremap -m /glade/campaign/cgd/amp/jet/ClimateNet/data_processing/maps/map_ne120_to_0.23x0.31_bilinear.nc -i {} -o /glade/scratch/tking/cgnet/rcp85_2086_2100/regridded_U850_V850/{}'.format(file, file))

# update batch script included in /scripts directory to run these commands from casper terminal with `qsub <batch_script_name>`


ncremap -m /glade/campaign/cgd/amp/jet/ClimateNet/data_processing/maps/map_ne120_to_0.23x0.31_bilinear.nc -i /glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2080010100Z-2080123118Z.nc -o /glade/scratch/tking/cgnet/rcp85_2086_2100/regridded_U850_V850//glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2080010100Z-2080123118Z.nc
ncremap -m /glade/campaign/cgd/amp/jet/ClimateNet/data_processing/maps/map_ne120_to_0.23x0.31_bilinear.nc -i /glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2081010100Z-2081123118Z.nc -o /glade/scratch/tking/cgnet/rcp85_2086_2100/regridded_U850_V850//glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U850.2081010100Z-2081123118Z.nc
ncremap -m /glade/campaign/cgd/amp/jet/ClimateNet/data_processing/maps/map_ne120_to_0.23x0.31_bilinear.nc -i /glade/scratch/tking/cgnet/rcp85_2086_2100/b.e13.BRCP85C5CN.ne120_g16.003a.cam.h4.U85