In [None]:
import os, sys
import netCDF4 as nc
import numpy as np

How To: concatenate many OCO Lite files into one, using OPeNDAP in Python<br>
This is a simple Python script intended to give an idea how to exploit remote OPeNDAP access and concatenate many Orbiting Carbon Observatory-2 (OCO-2) Lite daily files into one data object, without downloading individual Lite files

# Set the time period to concatenate.

Note the ISO 8601 time format. It is critical to adhere to it.<br>
There are no checks to ensure the format is correct.

These time strings are used in the CMR query, and if the format is not correct, null<br>
result will be received.

In [None]:
t1 = '2020-03-01T00:00:00Z'
t2 = '2020-03-02T23:59:00Z'
t1_t2 = t1 + ',' + t2

# Earthdata dataset short name and dataset version strings.

Make sure to use the currently available dataset version.

In [None]:
short_name = 'OCO2_L2_Lite_FP'
vid = '11.1r'

# Variables names to access and concatenate.

In [None]:
sdname = ['longitude', 'latitude', 'time', 'xco2', 'xco2_qf_simple_bitflag', 'xco2_quality_flag']
Nsd = len(sdname)

#  Here we build a one-line command that executes the CMR URL query<br>
and parses the response from the CMR.<br>
The command should look like this one-liner string:<br>
curl -sS "https://cmr.earthdata.nasa.gov/search/granules.native?short_name=OCO2_L2_Lite_FP&version=11.1r&temporal[]=2020-03-01T00:01:00Z,2020-03-31T00:59:00Z&pretty=true&page_size=2000" | awk -F">|<" '/<URL>http.*opendap/ {print $3}'

You can try from your shell, but remember - It is a one-line command, so no line<br>
breaks in the above string! Make sure the OCO2_L2_Lite_FP version is right!<br>
Depending on the shell, the square brackets may need to be escaped, like this: \[ \]<br>
Note the page_size=2000  in the query URL.<br>
This is the CMR limit on the number of data files links<br>
to return per query, and is equivalent to ~5 years of Lite files.<br>
We don't recommend setting the time period (t1,t2, above)<br>
to more than ONE year, though.<br>
Aggregating one year will be a process about 20 minutes long, and if<br>
some timeout or other disruption occurs, you'll have to start all over.

The CMR query used here results in a response in XML format.<br>
If the query is successful, the response contains a lot of extra<br>
information, so we need to parse only the opendap URLs by simple means.<br>
Here we use "awk".<br>
The CMR query and awk parsing are stored as one<br>
string, "cmd", which will be used as an argument in the IDL system call "SPAWN".

In [None]:
cmr = "curl -sS \"https://cmr.earthdata.nasa.gov/search/granules.native?short_name="+short_name+"&version="+vid+"&temporal[]="+t1_t2+"&pretty=true&page_size=2000\" | awk -F\">|<\" '/<URL>http.*opendap/ {print $3}'"
cmd = cmr + ' > temp.txt'

Here we spawn a child shell process, which is the actual query of CMR and awk parsing<br>
of the response.

In [None]:
os.system(cmd)
Nf = sum(1 for _ in open('temp.txt'))

If CMR returns null results, there is no sense to continue.

In [None]:
if Nf == 0:
    print("No granules found!")
    print("Query and parse: ", cmd)
    print("Check: Begin/End Dates; Dataset name and version")

If we pass the above check, CMR response contains at least one OPeNDAP URL.<br>
The list of all discovered OPeNDAP URL within the set time period are<br>
stored in text file "temp.txt".

In [None]:
flnm = ["" for x in range(Nf)]

In [None]:
f = open('temp.txt', 'r')
store = f.readlines()
f.close()

This is only preparation of data objects in the name space.<br>
No storage is allocated here.

In [None]:
lon = []
lat = []
time = []
xco2 = []
qfsmpl = []
qcf = []

# This is the loop where OPeNDAP URLs are accessed by the netCDF,<br>
and where the concatenation takes place.

In [None]:
for i in range(0, Nf):
    print(store[i])
    fid = nc.Dataset(store[i])
    xco20 = fid.variables['xco2'][:]
    lat0 = fid.variables['latitude'][:]
    lon0 = fid.variables['longitude'][:]
    qcf0 = fid.variables['xco2_quality_flag'][:]
    time0 = fid.variables['time'][:]

# This is the concatenation. The final concatenated data are<br>
in variables lon, lat, time, xco2, qfsmpl and qual

In [None]:
    lon.append(lon0.filled())
    lat.append(lat0.filled())
    xco2.append(xco20.filled())
    qcf.append(qcf0.filled())
    time.append(time0.filled())

# Close the current file handle after getting the data from the file.<br>
If you don't close, you may run out of allowed number of opened handles.

In [None]:
    fid.close()

# Below, all the variables have been concatenated as 1D vector variables<br>
in the computer memory.

An example quality screening, based on variable "xco2_quality_flag"<br>
from the Lite Full-Physics files. This quality flag is stored asvariable "qcf".<br>
The best quality is when qual=0.

In [None]:
xco2_all = np.hstack(xco2).squeeze()
lon_all  = np.hstack(lon).squeeze()
lat_all  = np.hstack(lat).squeeze()
qcf_all = np.hstack(qcf).squeeze()
time_all = np.hstack(time).squeeze()

Routine to subset best quality data points (qcf==0) out of all data points.

In [None]:
best = np.where(qcf_all==0)
xco2_best = xco2_all[best].squeeze()
lon_best = lon_all[best].squeeze()
lat_best = lat_all[best].squeeze()
time_best = time_all[best].squeeze()

# The following section is just an example of saving the concatenated data  into a netCDF-4 file.<br>
It is simplified to the bare minimum to write into your current working directory.<br>
Make sure you have enough space.<br>
It will create or overwrite netCDF-4 data file "test.nc"

Uncomment the following lines:<br>
 Setting up NCDF Dimensions, Variables

In [None]:
count = len(xco2_best)
foutid = nc.Dataset('new.nc',mode='w',format='NETCDF4_CLASSIC') 

In [None]:
dimtime = foutid.createDimension('time', None)
dimlat = foutid.createDimension('lat', count)
dimlon = foutid.createDimension('lon', count)
dimxco2 = foutid.createDimension('xco2', count)

In [None]:
varlon = foutid.createVariable('lon',float, ('lon',))
varlon.units = 'degrees_east'
varlon.long_name = 'longitude'

In [None]:
varlat = foutid.createVariable('lat',float, ('lat',))
varlat.units = 'degrees_north'
varlat.long_names = 'latitude'

In [None]:
vartime = foutid.createVariable('time',float, ('time',))
vartime.long_name = 'time'

In [None]:
varxco2 = foutid.createVariable('xco2',np.float64, ('xco2',))
varxco2.units = 'ppm'
varxco2.long_name = 'Bias-corrected, quality-filtered XCO2 on X2007 scale'

Fill the Variables

In [None]:
varlon[:] = lon_best
varlat[:] = lat_best
vartime[:] = time_best
varxco2[:] = xco2_best 

In [None]:
foutid.close()