In [55]:
"""
Name: extract_grid_center_indices
Author: Tania Lopez-Cantu
Date: 06/13/2020
-------------------------
The following notebook writes into a csv file the coordinates of the gridcell centers of
a NetCDF file. It assigns an id to each gridcell for creating a mesh for later easy mapping 
using ArcGIS, QGIS or geopandas.
"""

from netCDF4 import Dataset
import numpy as np
import pandas as pd
import os
import itertools

In [31]:
"""
Variables to modify:
file_path --> path to where .nc file is stored
save_path --> directory where to store the output of this notebook
"""
file_path = "cheswx_prcp_1948_2015.experimental.2017-08-14.nc"
save_path = "output/"

## code below is to avoid errors with user-specified paths, note that
## "save_path" finishes with "/", but if user does not include, we need
## to handle such case otherwise there will be an error in saving

if save_path[-1] != "/":
    save_path = f"{save_path}/"

In [32]:
# Load ncfile into notebook
nc_file = Dataset(file_path, "r")

In [33]:
# Look into nc_file contents/info. 
nc_file

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: Daily Interpolated Precipitation 19480101-20151231
    institution: Pennsylvania State University
    history: Created on: 2017-08-14 , experimental version 2
    comment: The ChesWx (Chesapeake Weather) gridded precipitation dataset contains interpolations of daily total precipitation at ~4km spatial resolution for a region surrounding the U.S. Mid-Atlantic.
    license: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (http://creativecommons.org/licenses/by-nc-sa/4.0/)
    Conventions: CF-1.6
    dimensions(sizes): time(24837), lon(207), lat(222), nv(2)
    variables(dimensions): int64 time(time), float64 lon(lon), float64 lat(lat), float64 time_bnds(time,nv), int16 crs(), float32 prcp(time,lat,lon)
    groups: 

In [34]:
"""
# Access variables. Each variable is stored in a dictionary. 
# Important: Look at the comment under "prcp": Daily rainfall is based on
# morning time of observation. When we get the AMS from these series, we will have F-Maxima
# (fixed), not S-Maxima (sliding). Fixed maxima are defined exactly as the comment says, daily
# rainfall is equal to the amount from one record time to another. Sliding maxima, however,
# take into account that a rainstorm can produce rainfall that starts in the middle of a day
# and continues over the next day. The total amount of rainfall delivered by the storm will
# be broken down into two days according to the fixed recording time. This is documented
# here: https://doi.org/10.1016/j.jhydrol.2016.09.058
"""
nc_file.variables

{'time': <class 'netCDF4._netCDF4.Variable'>
 int64 time(time)
     long_name: time
     standard_name: time
     bounds: time_bnds
     comments: ending hour of daily total
     units: hours since 1948-01-01
     calendar: standard
 unlimited dimensions: 
 current shape = (24837,)
 filling on, default _FillValue of -9223372036854775806 used,
 'lon': <class 'netCDF4._netCDF4.Variable'>
 float64 lon(lon)
     _FillValue: nan
     units: degrees_east
     long_name: longitude
     standard_name: longitude
 unlimited dimensions: 
 current shape = (207,)
 filling on,
 'lat': <class 'netCDF4._netCDF4.Variable'>
 float64 lat(lat)
     _FillValue: nan
     units: degrees_north
     long_name: latitude
     standard_name: latitude
 unlimited dimensions: 
 current shape = (222,)
 filling on,
 'time_bnds': <class 'netCDF4._netCDF4.Variable'>
 float64 time_bnds(time, nv)
     _FillValue: nan
 unlimited dimensions: 
 current shape = (24837, 2)
 filling on,
 'crs': <class 'netCDF4._netCDF4.Variable

In [41]:
"""
The information contained in the crs variable (This is the map projection)
0corresponds to the WGS 1984 datum. Go to example 5.9 -> http://cfconventions.org/Data/cf-conventions/cf-conventions-1.3/build/ch05s06.html
"""
crs = nc_file.variables["crs"]
crs

<class 'netCDF4._netCDF4.Variable'>
int16 crs()
    semi_major_axis: 6378137.0
    longitude_of_prime_meridian: 0.0
    grid_mapping_name: latitude_longitude
    inverse_flattening: 298.257223563
unlimited dimensions: 
current shape = ()
filling on, default _FillValue of -32767 used

In [51]:
"""
Latitude and Longitude variables are 1 dimensional, meaning that the grids are equally spaced.
Note that this is not always the case because the Earth is curved and GCMs that simulate 
climate over a large area need to use a map projection that maintain a true ratio between the various 
areas represented on the map, leading to curved surfaces. When this is the case, the code in
this notebook will not work.
"""
lat = np.ma.getdata(nc_file.variables["lat"][:])
lon = np.ma.getdata(nc_file.variables["lon"][:])

In [62]:
# Get horizontal spacing 
horizontal_spacing = lat[1] - lat[0]

# Get vertical spacing

vertical_spacing = lon[1] - lon[0]

# Almost the same as expected
print(f"Horizontal spacing: {horizontal_spacing}")
print(f"Vertical spacing: {vertical_spacing}")



Horizontal spacing: -0.0416666666666643
Vertical spacing: 0.04166666666689878


In [64]:
# Get coordinates of gridcell centers
coords = [f"{i},{j}" for i,j in itertools.product(lat,lon)]
indeces = [f"id_{i}_{j}" for i,j in itertools.product(np.arange(len(lat)),np.arange(len(lon)))]

In [69]:
# Create pandas dataframe that will be saved later.
df = pd.DataFrame()
df["grid_id"] = indeces
df["lat"] = [x.split(",")[0] for x in coords] # index 0 is lat
df["lon"] = [x.split(",")[1] for x in coords] # index 1 is long

In [70]:
# Result of above operations
df

Unnamed: 0,grid_id,lat,lon
0,id_0_0,43.25,-81.9999999994217
1,id_0_1,43.25,-81.9583333327548
2,id_0_2,43.25,-81.9166666660879
3,id_0_3,43.25,-81.874999999421
4,id_0_4,43.25,-81.8333333327541
...,...,...,...
45949,id_221_202,34.04166666666667,-73.58333333270711
45950,id_221_203,34.04166666666667,-73.54166666604021
45951,id_221_204,34.04166666666667,-73.4999999993733
45952,id_221_205,34.04166666666667,-73.4583333327064


In [71]:
# save df to path specified at the beginning of notebook
name_csv = "historical_ches_gridcells"
df.to_csv(f"{save_path}{name_csv}.csv")