# Read .nc and .grib files
**Coder:** Nadia Sae-Lim <br>
**Date:** &nbsp; 2024-11-22 <br>
**Goal:** 1) Export CERES and ERA5-Land variables from .nc files. <br>

### 1. CERES (.nc file)

### Set up

In [2]:
from netCDF4 import Dataset

def ncdump(nc_fid, verb=True):
    '''
    ncdump outputs dimensions, variables and their attribute information.
    The information is similar to that of NCAR's ncdump utility.
    ncdump requires a valid instance of Dataset.

    Parameters
    ----------
    nc_fid : netCDF4.Dataset
        A netCDF4 dateset object
    verb : Boolean
        whether or not nc_attrs, nc_dims, and nc_vars are printed

    Returns
    -------
    nc_attrs : list
        A Python list of the NetCDF file global attributes
    nc_dims : list
        A Python list of the NetCDF file dimensions
    nc_vars : list
        A Python list of the NetCDF file variables
    '''
    def print_ncattr(key):
        """
        Prints the NetCDF file attributes for a given key

        Parameters
        ----------
        key : unicode
            a valid netCDF4.Dataset.variables key
        """
        try:
            print ("\t\ttype:", repr(nc_fid.variables[key].dtype))
            for ncattr in nc_fid.variables[key].ncattrs():
                print ('\t\t%s:' % ncattr,\
                      repr(nc_fid.variables[key].getncattr(ncattr)))
        except KeyError:
            print ("\t\tWARNING: %s does not contain variable attributes" % key)

    # NetCDF global attributes
    nc_attrs = nc_fid.ncattrs()
    if verb:
        print ("NetCDF Global Attributes:")
        for nc_attr in nc_attrs:
            print ('\t%s:' % nc_attr, repr(nc_fid.getncattr(nc_attr)))
    nc_dims = [dim for dim in nc_fid.dimensions]  # list of nc dimensions
    # Dimension shape information.
    if verb:
        print ("NetCDF dimension information:")
        for dim in nc_dims:
            print ("\tName:", dim) 
            print ("\t\tsize:", len(nc_fid.dimensions[dim]))
            print_ncattr(dim)
    # Variable information.
    nc_vars = [var for var in nc_fid.variables]  # list of nc variables
    if verb:
        print ("NetCDF variable information:")
        for var in nc_vars:
            if var not in nc_dims:
                print ('\tName:', var)
                print ("\t\tdimensions:", nc_fid.variables[var].dimensions)
                print ("\t\tsize:", nc_fid.variables[var].size)
                print_ncattr(var)
    return nc_attrs, nc_dims, nc_vars

## Courtesy: Dr. Carrie Morrill (NOAA)

### CERES

In [142]:
file_name = r"C:\Users\nadia\PhD\lakemodeling\Updated_analysis\CERES\CERES_SYN1deg-1H_Terra-Aqua-MODIS_Ed4.1_Subset_20000301-20231231.nc"
nc_fid = Dataset(file_name)
nc_attrs, nc_dims, nc_vars = ncdump(nc_fid)
nc_fid.close()

with Dataset(file_name, mode='r') as f:
   dt = f.variables['time'][:]
   lons = f.variables['lon'][:]
   lats = f.variables['lat'][:]
   ssrd = f.variables['adj_atmos_sw_down_all_surface_1h'][:,:,0].squeeze()
   strd = f.variables['adj_atmos_lw_down_all_surface_1h'][:,:,0].squeeze()

NetCDF Global Attributes:
	title: 'CERES SYN1deg 1-Hourly data'
	institution: 'NASA Langley Research Center'
	Conventions: 'CF-1.4'
	comment: 'Satellites used: Terra 3/2000 - 6/2002; Terra+Aqua 7/2002 - 3/2022; Terra+NOAA20 4/2022 - present'
	Version: 'Edition 4.1: Release Date August 22, 2019'
	DOI: '10.5067/Terra-MODIS/CERES/SYN1deg-1Hour_L3.004A;10.5067/Terra+Aqua/CERES/SYN1deg-1Hour_L3.004A; 10.5067/Terra-NOAA20/CERES/SYN1deg-1Hour_L3.004A'
	Fill_Value: 'Fill Value is -999.0'
NetCDF dimension information:
	Name: time
		size: 208944
		type: dtype('float32')
		long_name: 'Time'
		standard_name: 'time'
		units: 'days since 2000-03-01 00:00:00'
		delta_t: '0000-00-01 00:00:00'
	Name: lon
		size: 2
		type: dtype('float32')
		long_name: 'Longitude'
		standard_name: 'longitude'
		units: 'degrees_east'
		valid_range: array([-180.,  360.], dtype=float32)
	Name: lat
		size: 1
		type: dtype('float32')
		long_name: 'Latitude'
		standard_name: 'latitude'
		units: 'degrees_north'
		valid_range: 

cannot be safely cast to variable data type
  ssrd = f.variables['adj_atmos_sw_down_all_surface_1h'][:,:,0].squeeze()
cannot be safely cast to variable data type
  ssrd = f.variables['adj_atmos_sw_down_all_surface_1h'][:,:,0].squeeze()
cannot be safely cast to variable data type
  strd = f.variables['adj_atmos_lw_down_all_surface_1h'][:,:,0].squeeze()
cannot be safely cast to variable data type
  strd = f.variables['adj_atmos_lw_down_all_surface_1h'][:,:,0].squeeze()


In [19]:
import numpy as np
import pandas as pd
h1 = ssrd.filled()
h2 = strd.filled()

data = {'ssrd': h1, 
        'strd': h2,
        } 
df = pd.DataFrame(data)
df

Unnamed: 0,ssrd,strd
0,0.000000,303.000000
1,0.000000,308.924988
2,0.000000,321.450012
3,0.000000,334.174988
4,0.000000,331.424988
...,...,...
208939,187.850006,361.774994
208940,107.974998,353.674988
208941,63.825001,353.950012
208942,43.375000,328.000000


In [23]:
df.to_csv(r"C:\Users\nadia\PhD\lakemodeling\Updated_analysis\CERES\CERES_west_grid.csv")

## 2. ERA5-Land

### ERA5-Land (.nc and .grib files)

In [69]:
# Code used in the original submission (1981 - 2019)
parent = "C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly/"
all_files = glob.glob(os.path.join(parent,"*.nc"))

test = {'datetime' : [0],
        't2m' : [0],
        'd2m' : [0],
        'ssrd': [0], 
        'strd': [0],
        'u10' : [0],
        'v10' : [0],
        'sp' : [0]
        } 
all_df = pd.DataFrame(test)

for i in all_files:
    file_name = i
    print(i)
    with Dataset(file_name, mode='r') as f:
        dt = f.variables['time'][:]
        datetimes = num2date(dt, f.variables['time'].units) 
        lons = f.variables['longitude'][:]
        lats = f.variables['latitude'][:]   
        t2m = f.variables['t2m'][:,:,49].squeeze()
        d2m = f.variables['d2m'][:,:,49].squeeze()
        ssrd = f.variables['ssrd'][:,:,49].squeeze()
        strd = f.variables['strd'][:,:,49].squeeze()
        u10 = f.variables['u10'][:,:,49].squeeze()
        v10 = f.variables['v10'][:,:,49].squeeze()
        sp = f.variables['sp'][:,:,49].squeeze()
    data = {
        'datetime' : datetimes,
        't2m' : t2m.filled(),
        'd2m' : d2m.filled(),
        'ssrd': ssrd.filled(), 
        'strd': strd.filled(),
        'u10' : u10.filled(),
        'v10' : v10.filled(),
        'sp' : sp.filled()
        } 
    df = pd.DataFrame(data)
    all_df = pd.concat([all_df, df])
all_df = all_df.iloc[1:]
all_df.to_csv("ERA5_Land_daily_-13.8_-71.1_hourly.csv")

C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1981.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1982.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1983.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1984.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1985.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1986.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1987.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1988.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1989.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1990.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1991.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\ERA5_1992.nc
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA

In [26]:
# Read .grib files
import xarray as xr
import cfgrib
import os
import glob
import sys

parent = "C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily/"
all_files = glob.glob(os.path.join(parent,"*.grib"))

test = {'datetime' : [0],
        't2m' : [0],
        'd2m' : [0],
        'ssrd': [0], 
        'strd': [0],
        'u10' : [0],
        'v10' : [0],
        'sp' : [0]
        } 
all_df = pd.DataFrame(test)

for f in all_files:
    print(f)
    d = xr.open_dataset(f, engine = "cfgrib")
    t2m = d.t2m[:,0,0]
    d2m = d.d2m[:,0,0]
    ssrd = d.ssrd[:,0,0]
    strd = d.strd[:,0,0]
    u10 = d.u10[:,0,0]
    v10 = d.v10[:,0,0]
    sp = d.sp[:,0,0]

    data = {'datetime' : d.time,
        't2m' : t2m,
        'd2m' : d2m,
        'ssrd': ssrd, 
        'strd': strd,
        'u10' : u10,
        'v10' : v10,
        'sp' : sp
        } 
    df = pd.DataFrame(data)
    all_df = pd.concat([all_df, df])
all_df.to_csv("ERA5_Land_daily_-13.8_-71.1.csv")

C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1950-1953.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1954-1957.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1958-1961.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1962-1965.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1966-1969.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1970-1973.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1974-1977.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1978-1980.grib


Ignoring index file 'C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\\1981-1984.grib.5b7b6.idx' incompatible with GRIB file


C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1981-1984.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1985-1988.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1989-1992.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\1993-1996.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Daily\2020-2023.grib


In [140]:
# Read .grib files (export individual variable, for example, surface pressure (sp))
import numpy as np
parent = "C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly/"
all_files = glob.glob(os.path.join(parent,"sp_*.grib"))

test = {'date' : [0],
        'hour' : [0],
#        't2m' : [0],
#        'd2m' : [0],
#        'ssrd': [0], 
#        'strd': [0],
#        'u10' : [0],
#        'v10' : [0],
        'sp' : [0]
        } 
all_df = pd.DataFrame(test)

for f in all_files:
    print(f)
    d = xr.open_dataset(f, engine = "cfgrib")
    for i in range(0,24):
        
      #  t2m = d.t2m[:,i,0,0]
      #  d2m = d.d2m[:,i,0,0]
#        ssrd = d.ssrd[:,i,0,0]
      #  strd = d.strd[:,i,0,0]
      #  u10 = d.u10[:,i,0,0]
      #  v10 = d.v10[:,i,0,0]
        sp = d.sp[:,i,0,0]

        data = {'date' : d.time,
                'hour' : i, 
     #           't2m' : t2m,
     #           'd2m' : d2m,
     #           'ssrd': ssrd, 
     #           'strd': strd,
     #            'u10' : u10,
     #           'v10' : v10,
                'sp' : sp
        } 
        df = pd.DataFrame(data)
        all_df = pd.concat([all_df, df])
        
all_df = all_df.iloc[1:]
all_df.to_csv("sp.csv")

C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\sp_2020.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\sp_2021.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\sp_2022.grib
C:/Users/nadia/PhD/lakemodeling/Updated_analysis/ERA5-Land/Hourly\sp_2023.grib
