# Scan MERRA-2 atmospheric properties during one month
------------------------------------------------------------------

- author: Sylvie Dagoret-Campagne
- creation November 26 Novembre 2016
- update April 25th 2018

Link:

http://disc.sci.gsfc.nasa.gov/datareleases/merra_2_data_release

### purpose:

Scan One year of MERRA-2 predictions of the dataset inst1_2d_asm_Nx_M2I1NXASM over one month. 
Extract the relevant atmospheric variables.
Build the correcponding time series and dataset in pandas.
Plot the variables. Save the pandas dataset into a file.
Convert the pandas dataset into an astropy fits table and save into a fits file as well.


## 1) python libraries
---------------------------

In [None]:
# Set up matplotlib and use a nicer set of plot parameters
%config InlineBackend.rc = {}
import matplotlib
import matplotlib as mpl
matplotlib.rc_file("templates/matplotlibrc")
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import datetime

In [None]:
from matplotlib.dates import MonthLocator, WeekdayLocator,DateFormatter
from matplotlib.dates import MONDAY

In [None]:
mondays = WeekdayLocator(MONDAY)
months = MonthLocator(range(1, 13), bymonthday=1, interval=1)
monthsFmt = DateFormatter("%b '%y")

In [None]:
import os
import re
import numpy as np
from mpl_toolkits.basemap import Basemap
from matplotlib import colors
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

In [None]:
from astropy import units as u
from astropy.coordinates import SkyCoord

from astropy.table import Table

In [None]:
import h5py

In [None]:
import libGMAOMERRA2Data as merra2  # My own library

In [None]:
############################################################################
def ensure_dir(f):
    d = os.path.dirname(f)
    if not os.path.exists(f):
        os.makedirs(f)
#########################################################################

## 2)  Configuration
-------------------------

In [None]:
# SELECT MONTH IN 2017
MONTH_NAME='06' # select jun

In [None]:
# SELECT OBSERVATORY
OBS_NAME='ctio'

In [None]:
# where are the HDF files
#HDFEOS_ZOO_DIR="/Volumes/DAGORETBACK/MERRA-2/inst1_2d_asm_Nx_M2I1NXASM/2016"
#HDFEOS_ZOO_DIR="/Volumes/LaCie2/DATA/MERRA-2/inst1_2d_asm_Nx_M2I1NXASM"
HDFEOS_ZOO_DIR="/sps/lsst/data/AtmosphericCalibration/MERRA-2/May-Jun-2017/subset_M2I1NXASM_V5.12.4_20180424_201411"

In [None]:
path=HDFEOS_ZOO_DIR

In [None]:
# The selected data field
DATA_NAME =  'inst1_2d_asm_Nx_M2I1NXASM'   # Select Ozone column Depth

In [None]:
pandas_filename='MERRA2_2017_'+DATA_NAME+'_'+OBS_NAME+'_'+MONTH_NAME+'.csv'

In [None]:
fits_filename='MERRA2_2017_'+DATA_NAME+'_'+OBS_NAME+'_'+MONTH_NAME +'.fits'

In [None]:
figfile_ozone='GMAO_MERRA2_2017_'+DATA_NAME+'_'+OBS_NAME+'_'+MONTH_NAME+'_ozone'+'.jpg'
figfile_pwv='GMAO_MERRA2_2017_'+DATA_NAME+'_'+OBS_NAME+'_'+MONTH_NAME+'_pwv'+'.jpg'
figfile_ps='GMAO_MERRA2_2017_'+DATA_NAME+'_'+OBS_NAME+'_'+MONTH_NAME+'_ps'+'.jpg'

In [None]:
# Select observatory
loc=merra2.observatory_location(OBS_NAME)

In [None]:
loc

### 2.2) Getting the list of the files
------------------------------

In [None]:
nc4_files = [f for f in os.listdir(path) if f.endswith('.nc4')]  

In [None]:
nc4_files[:5]

### 2.3) Select files of a given month

In [None]:
keysel_filename='^MERRA2_400.inst1_2d_asm_Nx.2017{}.*'.format(MONTH_NAME)

In [None]:
print 'Selection key' ,keysel_filename

In [None]:
nc4_files2 = []
for file in nc4_files:
    if re.findall(keysel_filename,file):
        nc4_files2.append(file)

nc4_files2=np.array(nc4_files2)

In [None]:
nc4_files2

### 2.4) Sort files by increasing time

In [None]:
nc4_files=np.sort(nc4_files2)

### 2.5) Build the full filename before reading

In [None]:
NBFILES=len(nc4_files)
full_nc4files=[]

for file in nc4_files:
    fname = os.path.join(path, file)
    full_nc4files.append(fname)  

## 3)  Extract data and write them into pandas dataset and time series
-------------------------------------------------------------------------------------

- probably this is the best and simple way to extract to avoid errors

In [None]:
to3_timeseries=[] # example of time series for ozone
pwv_timeseries=[] # time series for precipitable water vapor
ps_timeseries=[] # time series for pressure

df_inst1_2d_asm_Nx=[] # dataset for all atmospheric quantities

for file in full_nc4files:
    
    #Retrieve 1D parameters longitude, latitude, time
    (m_lat,m_un_lat,m_nm_lat) = merra2.Get1DData(file,'lat') # latitude (array, unit, name)
    m_latitude = m_lat[:]
    (m_lon,m_un_lon,m_nm_lon) = merra2.Get1DData(file,'lon') # longitude(array, unit, name)
    m_longitude = m_lon[:]
    (m_tim,m_un_tim,m_nm_tim)= merra2.Get1DData(file,'time') # time (array, unit, name)
    m_time=m_tim[:]
       
    NbDataPerFile=m_time.shape[0] # number of data sample per file
    #start_time = re.findall("^minutes since[ ]([0-9.].+[0-9.].+[0-9.].+)[ ]00:00:00$",m_un_tim) # extract start time
    start_time = re.findall("^minutes since[ ]([0-9.].+[0-9.].+[0-9.].+)",m_un_tim) # extract start time
    
    #print 'start_time = ', start_time
    time_rng = pd.date_range(start_time[0], periods=NbDataPerFile, freq='H') # one data per hour
    
    print '---------------------------------------------'
    print 'start_time = ', start_time
    print 'time_rng   = ', time_rng[:5]
    
    m_X,m_Y=np.meshgrid(m_longitude,m_latitude) # build meash-grid in longitude and latitude
    (sel_long, sel_lat)=merra2.GetBinIndex(m_X,m_Y,loc[0],loc[1]) # get bin in longitude and latitude for the site  
    
    # Retrieve Ozone
    (m_to3_data,m_to3_unit,m_to3_longname)=merra2.GetGeoRefData(file,'TO3') # 3D array : time x longitude x latitude  
    to3=m_to3_data[:,sel_lat,sel_long]
    ts = pd.Series(to3, index=time_rng)
    to3_timeseries.append(ts)
    # Retrieve PWV
    (m_pwv_data,m_pwv_unit,m_pwv_longname)=merra2.GetGeoRefData(file,'TQV')
    pwv=m_pwv_data[:,sel_lat,sel_long]
    ts = pd.Series(pwv, index=time_rng)
    pwv_timeseries.append(ts)
    # Retreive Pressure
    (m_ps_data,m_ps_unit,m_ps_longname)=merra2.GetGeoRefData(file,'PS')
    ps=m_ps_data[:,sel_lat,sel_long]
    ts = pd.Series(ps, index=time_rng)
    ps_timeseries.append(ts)
    
    
    # Create the dataframe
    df = pd.DataFrame({'ozone': to3 , 'pwv' : pwv, 'ps' : ps}, index=time_rng)
    df_inst1_2d_asm_Nx.append(df)  
    

## 4) Plot the time dependence of time series and dataset

####  Fill information on years and months from the filenames

In [None]:
result = pd.concat(to3_timeseries)

In [None]:
plt.figure(figsize=(20,3))
result.plot(grid=True,color='blue',linewidth=2)
plt.xlabel('time')
plt.ylabel('ozone')

In [None]:
result = pd.concat(pwv_timeseries)

In [None]:
plt.figure(figsize=(20,3))
result.plot(color='blue',linewidth=2)
plt.xlabel('time')
plt.ylabel('pwv')
plt.grid(True)

In [None]:
df_inst1_2d_asm_Nx_tot=pd.concat(df_inst1_2d_asm_Nx)

In [None]:
df_inst1_2d_asm_Nx_tot.info()

In [None]:
plt.figure(figsize=(20,5))
df_inst1_2d_asm_Nx_tot['ozone'].plot(color='blue',linewidth=2)
plt.xlabel('time')
plt.ylabel('ozone (Dobsons)')
title= "Hourly ozone column depth at {} in 2017/{}".format(OBS_NAME,MONTH_NAME)
plt.title(title,fontsize=15)
plt.suptitle('NASA GMAO GES (MERRA-2)', y=1.02, fontsize=13)
plt.grid(True)
plt.savefig(figfile_ozone)


In [None]:
plt.figure(figsize=(20,5))
df_inst1_2d_asm_Nx_tot['ps'].plot(color='blue',linewidth=2)
plt.xlabel('time')
plt.ylabel('pressure (Pa)')
title= "Hourly Pressure  at {} in 2017/{}".format(OBS_NAME,MONTH_NAME)
plt.title(title,fontsize=15)
plt.suptitle('NASA GMAO GES (MERRA-2)', y=1.02, fontsize=13)
plt.grid(True)
plt.savefig(figfile_ps)

In [None]:
plt.figure(figsize=(20,5))
df_inst1_2d_asm_Nx_tot['pwv'].plot(color='blue',linewidth=2)
plt.xlabel('time')
plt.ylabel('pwv (kg/m2)')
title= "Hourly PWV column depth at {} in 2017/{}".format(OBS_NAME,MONTH_NAME)
plt.title(title,fontsize=15)
plt.suptitle('NASA GMAO GES (MERRA-2)', y=1.02, fontsize=13)
plt.grid(True)
plt.savefig(figfile_pwv)

In [None]:
df_inst1_2d_asm_Nx_tot.index.name='time'
df_inst1_2d_asm_Nx_tot.describe()

## 5)  Save dataset  in file pandas (csv)
----------------------------------------

In [None]:
dataset=df_inst1_2d_asm_Nx_tot

In [None]:
dataset.index.name='time'

In [None]:
dataset.describe()

In [None]:
dataset.head()

In [None]:
dataset.to_csv(pandas_filename)

In [None]:
saved_dataset=pd.read_csv(pandas_filename)

In [None]:
saved_dataset.head()

## 6) Convert dataset into a table and then save in a fits file
--------------------------------------------------------------------------

In [None]:
table = Table.from_pandas(saved_dataset)

In [None]:
table

In [None]:
table.write(fits_filename,format='fits',overwrite=True)