## 5 Importing Standard Earth Science Datasets
## 5.1 Text

In [None]:
import pandas as pd
fname = 'data/campfire-gases-2018-11.csv'
trace_gases = pd.read_csv(fname)

In [None]:
trace_gases.head()

In [None]:
trace_gases['H2O_MR_500mb']

In [None]:
drops = list(trace_gases.columns[5:])
print(drops)

In [None]:
trace_gases.drop(columns=drops, inplace=True)

In [None]:
trace_gases.head()

In [None]:
with open('data/campfire-gases-2018-11.csv') as data:
    for row in data:
        print(row)
        column=''
        for character in row:
            if character != ',':
                column=column+character
            else:
                print(column)
                column = ''
        break

In [None]:
#This could be replaced by something like ' '.join(row.split(','))

with open('data/campfire-gases-2018-11.csv') as data:
    for row in data:
        print(row.split(','))
        break


## 5.2 NetCDF

In [None]:
from netCDF4 import Dataset

In [None]:
fname='data/aod/JRR-AOD_v1r1_npp_s201808091955538_e201808091957180_c201808092049460_thinned.nc'
file_id = Dataset(fname)

In [None]:
print(file_id)

In [None]:
file_id.variables.keys()

In [None]:
AOD_550 = file_id.variables['AOD550']
type(AOD_550)

In [None]:
import numpy as np
AOD_550 = np.array(AOD_550)
AOD_550.shape

In [None]:
AOD_550

In [None]:
avgAOD = AOD_550.mean()
print(avgAOD)

In [None]:
print(file_id.variables['AOD550'])

In [None]:
missing = file_id.variables['AOD550']._FillValue
missing

In [None]:
keep_rows = AOD_550 != missing

AOD_550[50:60, 100], keep_rows[50:60, 100]

In [None]:
AOD_550_filtered = AOD_550[keep_rows]
AOD_550_filtered

In [None]:
# Re-compute the statistics, these values look more reasonable.
avgAOD = AOD_550_filtered.mean()
avgAOD

In [None]:
AOD_550.size, AOD_550_filtered.size

In [None]:
AOD_550 = file_id.variables['AOD550'][:,:]
type(AOD_550)

In [None]:
AOD_550[50:60, 100]

In [None]:
avgAOD = AOD_550.mean()
avgAOD

In [None]:
file_id.variables['AOD550'].valid_range

## 5.3 HDF

**Errata:** The book originally imports the operational file, using the path ```data/3B-HHR.MS.MRG.3IMERG.20170827-S120000-E122959.0720.V06B.HDF5```. This path should be changed to  ```data/3B-HHR.MS.MRG.3IMERG.20170827-S120000-E122959.0720.V06B_thinned.HDF5```; this is a smaller version of the operational file. The correct code is shown below.

In [None]:
import h5py

fname = 'data/3B-HHR.MS.MRG.3IMERG.20170827-S120000-E122959.0720.V06B_thinned.HDF5' 
file_id = h5py.File(fname, 'r')
file_id

In [None]:
list(file_id)

In [None]:
print(list(file_id["Grid"].keys()))

In [None]:
file_id.visit(print)

In [None]:
precip = file_id["Grid/precipitationCal"][:,:,:]
precip

In [None]:
print(list(file_id["Grid/precipitationCal"].attrs))

In [None]:
missing = file_id["Grid/precipitationCal"].attrs['_FillValue']
missing

In [None]:
PrecipMask = (precip == missing)
precip = np.ma.masked_array(precip, mask=PrecipMask)
precip

In [None]:
precip.mean()

## 5.4 GRIB2

NOTE: The pygrib package was originally left out of the environment.yml file. If you downloaded an older version of this repository, please install this package into the ```earthsciviz``` environment using:

```
conda install -c conda-forge pygrib
```
The environment.yml has since been updated.

In [None]:
import pygrib

In [None]:
filename = 'data/gfsanl_3_20200501_0000_000.grb2'
gfs_grb2 = pygrib.open(filename)
records = [str(grb) for grb in gfs_grb2]

In [None]:
records[12]

In [None]:
temps = gfs_grb2.select(name='Temperature')

In [None]:
temp = gfs_grb2[315]

In [None]:
temps

In [None]:
temp.keys()

In [None]:
lat = temp.latitudes
lon = temp.longitudes
level = temp.level
units = temp.units
analysis_date = temp.analDate
fcst_time = temp.forecastTime

## 5.5 Importing Data using xarray
### 5.5.1 netCDF

In [None]:
import xarray as xr

In [None]:
fname = 'data/NUCAPS-EDR_v2r0_npp_s201903031848390_e201903031849090_c201903031941100.nc'
nucaps = xr.open_dataset(fname, decode_times=False)
print(nucaps)

In [None]:
profile = nucaps.sel(Number_of_CrIS_FORs=0)
print(profile)

In [None]:
temp_profile = profile.Temperature
print(temp_profile)

In [None]:
lat, lon = profile.Latitude.item(), profile.Longitude.item()
print(lat, lon)

In [None]:
mask = profile.Pressure.round() == 300
gradient = nucaps.sel(Number_of_P_Levels = mask)
print(gradient)

In [None]:
pres_dict = {}
for i, p in enumerate(profile.Pressure):
    if p >= 100:
        pres_dict.update({int(p) : i})

In [None]:
pres_dict[300]

In [None]:
mask = (nucaps.Pressure.round() == 300)
nucaps.Temperature.values[mask]

In [None]:
lats = nucaps.Latitude
lons = nucaps.Longitude

### 5.2.2 GRIB2

NOTE: The ```cfgrib``` package was originally left out of the environment.yml file. If you downloaded an older version of this repository, please install this package into the ```earthsciviz``` environment using:

```
conda install -c conda-forge cfgrib
```
The environment.yml has since been updated.

In [None]:
import cfgrib

In [None]:
filter_keys = {'filter_by_keys' : {'typeOfLevel': 'isobaricInhPa', 'name': 'Temperature'}}

In [None]:
filename = 'data/gfsanl_3_20200501_0000_000.grb2'
gfs = xr.open_dataset(filename, engine='cfgrib', backend_kwargs=filter_keys)
print(gfs)

In [None]:
filename = 'data/gfsanl_3_20200501_0000_000.grb2'
gfs = xr.open_dataset(filename, engine='cfgrib', backend_kwargs=filter_keys)
print(gfs)

### 5.2.3 Accessing datasets using OpenDAP

In [None]:
baseURL = 'http://www.esrl.noaa.gov'
catalogURL = '/psd/thredds/dodsC/Datasets/noaa.ersst.v5/sst.mnmean.nc'
sst = xr.open_dataset(baseURL+catalogURL)
print(sst)