In [2]:
import netCDF4
import pandas as pd
import matplotlib.pyplot as plt

In [76]:
import numpy as np

In [3]:
nc = netCDF4.Dataset("oco_fl_data.nc")

In [57]:
# Access the metadata, the conventional way
# Shows file format, data source, data version, citation, dimensions, and variables

print(nc)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    source_dataset_version: OCO-2 L2Std v10
    format: NetCDF-4/HDF-5
    conventions: CF-1.6
    source: OCO-2 Level 2 Full Physics Standard Product
    contact: http://co2.jpl.nasa.gov
    calendar: 365
    institution: Jet Propulsion Laboratory
    processing_level: L2
    title: OCO2L2Stdv10 - Level 2 Subsetted Product
    dimensions(sizes): points_dimension(3996)
    variables(dimensions): <class 'str'> time(points_dimension), float64 lon(points_dimension), float64 lat(points_dimension), float64 alt(points_dimension), float32 xco2(points_dimension), float32 xco2_uncert(points_dimension)
    groups: 


In [5]:
# Access metadata as Dictionary

print(nc.__dict__)

OrderedDict([('source_dataset_version', 'OCO-2 L2Std v10'), ('format', 'NetCDF-4/HDF-5'), ('conventions', 'CF-1.6'), ('source', 'OCO-2 Level 2 Full Physics Standard Product'), ('contact', 'http://co2.jpl.nasa.gov'), ('calendar', '365'), ('institution', 'Jet Propulsion Laboratory'), ('processing_level', 'L2'), ('title', 'OCO2L2Stdv10 - Level 2 Subsetted Product')])


In [7]:
# Do this if there are more than one dimension size, in this case there's only one
# Access individual dimension by nc.dimensions['dimension_name']

for dim in nc.dimensions.values():
    print(dim)

<class 'netCDF4._netCDF4.Dimension'>: name = 'points_dimension', size = 3996


In [8]:
# Access vaiable metadata

for var in nc.variables.values():
    print(var)

<class 'netCDF4._netCDF4.Variable'>
vlen time(points_dimension)
    long_name: Coordinated Universal Time (UTC)
    format: YYYYMMDDHHMMSS
vlen data type: <class 'str'>
unlimited dimensions: 
current shape = (3996,)
<class 'netCDF4._netCDF4.Variable'>
float64 lon(points_dimension)
    units: degrees east
    long_name: longitude
unlimited dimensions: 
current shape = (3996,)
filling on, default _FillValue of 9.969209968386869e+36 used
<class 'netCDF4._netCDF4.Variable'>
float64 lat(points_dimension)
    units: degrees north
    long_name: latitude
unlimited dimensions: 
current shape = (3996,)
filling on, default _FillValue of 9.969209968386869e+36 used
<class 'netCDF4._netCDF4.Variable'>
float64 alt(points_dimension)
    units: km
    long_name: altitude
unlimited dimensions: 
current shape = (3996,)
filling on, default _FillValue of 9.969209968386869e+36 used
<class 'netCDF4._netCDF4.Variable'>
float32 xco2(points_dimension)
    units: Moles Mole^{-1}
    invalid_value: -999999.0
   

### Retrieve Data Values of Interest 
In this case, it's the xCO2 values. Data is downloaded from NASA website (https://co2.jpl.nasa.gov)

In [58]:
time_var = nc.variables['time']

In [59]:
# Variable time has long_name of 'Coordinated Universal Time (UTC)', and format of 'YYYYMMDDHHMMSS'

time_var

<class 'netCDF4._netCDF4.Variable'>
vlen time(points_dimension)
    long_name: Coordinated Universal Time (UTC)
    format: YYYYMMDDHHMMSS
vlen data type: <class 'str'>
unlimited dimensions: 
current shape = (3996,)

In [60]:
# This variable only has one dimension, named "points_dimension"

time_var.dimensions

('points_dimension',)

In [62]:
# Reading that variable, which in this case is an array of (3996,)

time = time_var[:]
print(time)

['20140907191130' '20140907191131' '20140907191131' ... '20140922183058'
 '20140922183058' '20140922183058']


In [71]:
lat = nc.variables['lat'][:]
print(lat)

[25.68096352 25.71465683 25.70790291 ... 32.18590164 32.18090057
 32.17573547]


In [72]:
lon = nc.variables['lon'][:]
print(lon)

[-87.83446503 -87.85549164 -87.84738922 ... -81.47795868 -81.46549225
 -81.45307159]


In [73]:
xco2 = nc.variables['xco2'][:]
print(xco2)

[0.00038603 0.00039089 0.00039106 ... 0.00039077 0.00038847 0.00038641]


In [81]:
xco2_uncert = nc.variables['xco2_uncert'][:]
print(xco2_uncert)

[1.3657205e-06 1.1430939e-06 1.3678181e-06 ... 6.7961605e-07 6.2806271e-07
 6.1870622e-07]


In [84]:
final = pd.DataFrame(np.array([time, lat, lon, xco2, xco2_uncert])).T

In [85]:
final

Unnamed: 0,0,1,2,3,4
0,20140907191130,25.681,-87.8345,0.000386026,1.36572e-06
1,20140907191131,25.7147,-87.8555,0.00039089,1.14309e-06
2,20140907191131,25.7079,-87.8474,0.000391056,1.36782e-06
3,20140907191131,25.7011,-87.8393,0.000390469,1.35486e-06
4,20140907191132,25.7613,-87.854,0.000382264,1.15966e-06
...,...,...,...,...,...
3991,20140922183058,32.1627,-81.4596,0.000393531,6.3686e-07
3992,20140922183058,32.1575,-81.4472,0.000387889,5.66249e-07
3993,20140922183058,32.1859,-81.478,0.000390775,6.79616e-07
3994,20140922183058,32.1809,-81.4655,0.000388468,6.28063e-07


In [86]:
final.columns = ['Time', 'Lat', 'Long', 'xCO2', 'xCO2_uncert']

In [93]:
pd.to_datetime(final['Time'], utc=True)

0      2014-09-07 19:11:30+00:00
1      2014-09-07 19:11:31+00:00
2      2014-09-07 19:11:31+00:00
3      2014-09-07 19:11:31+00:00
4      2014-09-07 19:11:32+00:00
                  ...           
3991   2014-09-22 18:30:58+00:00
3992   2014-09-22 18:30:58+00:00
3993   2014-09-22 18:30:58+00:00
3994   2014-09-22 18:30:58+00:00
3995   2014-09-22 18:30:58+00:00
Name: Time, Length: 3996, dtype: datetime64[ns, UTC]

In [96]:
final['Time'] = pd.to_datetime(final['Time'], utc=True)

In [97]:
final['Time'].dtype

datetime64[ns, UTC]

In [98]:
final

Unnamed: 0,Time,Lat,Long,xCO2,xCO2_uncert
0,2014-09-07 19:11:30+00:00,25.681,-87.8345,0.000386026,1.36572e-06
1,2014-09-07 19:11:31+00:00,25.7147,-87.8555,0.00039089,1.14309e-06
2,2014-09-07 19:11:31+00:00,25.7079,-87.8474,0.000391056,1.36782e-06
3,2014-09-07 19:11:31+00:00,25.7011,-87.8393,0.000390469,1.35486e-06
4,2014-09-07 19:11:32+00:00,25.7613,-87.854,0.000382264,1.15966e-06
...,...,...,...,...,...
3991,2014-09-22 18:30:58+00:00,32.1627,-81.4596,0.000393531,6.3686e-07
3992,2014-09-22 18:30:58+00:00,32.1575,-81.4472,0.000387889,5.66249e-07
3993,2014-09-22 18:30:58+00:00,32.1859,-81.478,0.000390775,6.79616e-07
3994,2014-09-22 18:30:58+00:00,32.1809,-81.4655,0.000388468,6.28063e-07


In [99]:
final.to_csv('xco2_final.csv')