<a href="https://colab.research.google.com/github/sanAkel/ufs_diurnal_diagnostics/blob/main/RTOFS/binary_nc_converter/read_archive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import xarray as xr
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

data_path="/content/drive/MyDrive/datasets/tmp/read_archive/"

In [None]:
!ls -al $data_path

### These functions are based on those at [pyhycom.](https://github.com/uwincm/pyhycom/blob/master/pyhycom.py)

In [None]:
# return file handle
def open_a_file(filename, mode):
    file = open(filename[:-1]+'a',mode=mode)
    return file

#Return the name of the corresponding HYCOM "b" file.
def get_b_filename(fName):
    bfilename = fName[:-1]+'b'
    return bfilename

#Return a list where each element contains text from each line of `b file`
def getTextFile(fName):
    return [line.rstrip() for line in open(fName,'r').readlines()]

In [None]:
# get dimensions of an archive from .b file
def getDims(fName, topo_file=False):

  f = getTextFile(get_b_filename(fName))
  idmFound, jdmFound = [False, False]

  if topo_file:
    for line in f:
        if 'i/jdm' in line:
          xx = line.split()[3]; jdm = xx[0:4]
          idm = line.split()[2]
          idmFound = True
          jdmFound = True
        if idmFound and jdmFound:break
  else:
    for line in f:
        if 'idm' in line:
          idm = line.split()[0]
          idmFound = True
        if 'jdm' in line:
          jdm = line.split()[0]
          jdmFound = True
        if idmFound and jdmFound:break

  return int(jdm), int(idm)

In [None]:
def getFieldIndex(field, fName):
    f = getTextFile(get_b_filename(fName))
    if 'arch' in fName.split('/')[-1]:f = f[10:] # skip first 10 lines
    if 'grid' in fName.split('/')[-1]:f = f[3:] # skip first 3 lines
    fieldIndex = []
    for line in f:
      if field == line.split()[0].replace('.','').replace(':',''):
        fieldIndex.append(f.index(line))
    return fieldIndex

In [None]:
def getField(field, fName, undef=np.nan, x_range=None, y_range=None):

  dims = getDims(fName)
  if dims.__len__() == 2:
    jdm, idm = dims
  else:
    jdm, idm, kdm = dims
    print("\n-- CAUTION! Read 3d archive is not yet ready!\n")

  reclen = 4*idm*jdm # Record length in bytes
  # HYCOM binary data is written out in chunks/"words" of multiples of 4096*4 bytes.
  # Length of one level of one variable (reclen) will be between
  # consecutive multiples of the wordlen. Data is padded to bring the volume
  # up to the next multiple. The "pad" value below is equal to the bytes that are needed to do this.
  wordlen = 4096*4
  pad = wordlen * np.ceil(reclen / wordlen) - reclen   # Pad size in bytes
  fieldRecords = getFieldIndex(field,fName)         # Get field record indices
  fieldAddresses = np.array(fieldRecords)*(reclen+pad) # Address in bytes

  file = open_a_file(fName,mode='rb') # Open file
  if dims.__len__() == 2: # 2-d field
    field = np.zeros((jdm,idm))
    file.seek(int(fieldAddresses[0]),0) # Move to address of the field
    data = file.read(idm*jdm*4)
    field = np.reshape(np.frombuffer(data, dtype='float32', count=idm*jdm),(jdm,idm)).byteswap()

    if not x_range is None:
      field = field[:,:,x_range]
    if not y_range is None:
      field = field[:,y_range,:]

  #field = field.byteswap() # Convert to little-endian
  file.close()
  field[field == np.float32(2**100)] = undef

  return field

In [None]:
# Number of records in the binary file, read from .b
def getNumberOfRecords(fName):
  f = getTextFile(get_b_filename(fName))
  if 'arch' in fName:
      f = f[10:]; return len(f)
  if 'grid' in fName:
      f = f[3:]; return len(f)
  if 'depth' in fName:
      return 1
  if 'restart' in fName:
      f = f[2:]; return len(f)

In [None]:
def getBathymetry(grid_fName, topog_fName, undef=np.nan):

  jdm,idm = getDims(grid_fName)

  file = open_a_file(topog_fName, mode='rb')
  #Data is in float32, which has 4 bytes/value
  data = file.read(idm*jdm*4)
  field = np.reshape(np.frombuffer(data,dtype='float32',count=idm*jdm).byteswap(),(jdm,idm))
  file.close()

  print(f"field.shape={field.shape}")
  field[field>2**99] = undef

  return field

In [None]:
bFile=get_b_filename(data_path+"/regional.grid.a")
print(f"Reading {bFile}\n")

jm, im = getDims(data_path+"/regional.grid.a")
print(f"im={im}, jm={jm}")

jm, im = getDims(data_path+"/depth_GLBb0.08_09m11.a", topo_file=True)
print(f"im={im}, jm={jm}")

In [None]:
fn1 = data_path+"/regional.grid.a"
print(getFieldIndex('plat',fn1))
print("\n")

fn2 = data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a"
print(getFieldIndex('srfhgt',fn2))
print(getFieldIndex('salin',fn2))

In [None]:
print(getNumberOfRecords(data_path+"/regional.grid.a"))
print("\n")
print(getNumberOfRecords(data_path+"/depth_GLBb0.08_09m11.a"))
print("\n")
print(getNumberOfRecords(data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a"))

In [None]:
depth=getBathymetry(data_path+"/regional.grid.a", data_path+"/depth_GLBb0.08_09m11.a")

In [None]:
plt.imshow(depth, origin='lower')
plt.colorbar()

In [None]:
plon = getField('plon', data_path+"/regional.grid.a")
plat = getField('plat', data_path+"/regional.grid.a")

# variables
sst = getField('temp',   data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a")
sss = getField('salin',  data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a")
ssh = getField('srfhgt', data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a")

uvel = getField('u-vel', data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a")
vvel = getField('v-vel', data_path+"/v2.4_rtofs_glo.t00z.n00.archs.a")

In [None]:
plt.figure(figsize=(10,10))

plt.subplot(2,2,1)
plt.imshow(sst, origin='lower'); plt.colorbar(shrink=0.4)

plt.subplot(2,2,2)
plt.imshow(sss, origin='lower'); plt.colorbar(shrink=0.4)

plt.subplot(2,2,3)
plt.imshow(ssh, origin='lower'); plt.colorbar(shrink=0.4)

plt.subplot(2,2,4)
plt.imshow( np.sqrt(uvel**2 + vvel**2), origin='lower'); plt.colorbar(shrink=0.4)

In [None]:
!cat $data_path/v2.4_rtofs_glo.t00z.n00.archs.b