In [1]:
####################################################################################################
# CR.Screening.AOD_WV_Precip_Tau.ipynb
# Karl Lapo September/2015
####################################################################################################
# Aggregating data according to AOD, WV, Precip, and Tau. Figures for CalRad Paper
####################################################################################################

# must insert this statement to render the plots within the notebook
# this is specific to the ipython notebook
%matplotlib inline

## Import statements
# netcdf/numpy/xray/stats
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
import xray
import kray
from scipy import interpolate as interp
from scipy.stats.stats import pearsonr
from scipy import stats

# OS interaction
import sys, pickle, os

# import subplots function for plotting
import seaborn as sns
import matplotlib
from matplotlib.pyplot import subplots
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.basemap import Basemap
import kgraph

# Solargeometry
import solargeo

## Directory listing
dir_data = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/data'
dir_print = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/Graphics'

# List of sub-directory names for each data set
dir_NLDAS = '/NLDAS'
dir_SYN = '/CERES_SYN'
dir_grobs = '/GroundObs'
dir_VIC = '/VIC_MTCLIM'
dir_WRF = '/WRF'
dir_AN = '/aeronet'

# Directory for basemap pickle files
dir_bmap = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/data/basemap'

In [2]:
##### Load daily data

###########
## NLDAS ##
os.chdir(dir_data+dir_NLDAS)
nldas = xray.open_dataset('CA.NLDAS.irrad.daily.nc')
nldas = nldas.rename({'DLWRF_110_SFC':'LWdwn','DSWRF_110_SFC':'SWdwn'})
nldas.SWdwn.values[nldas.SWdwn.values > 2000] = np.nan

#########
## SYN ##
os.chdir(dir_data+dir_SYN)
syn = xray.open_dataset('CA.SYN.irrad.daily.nc')
############ No flipping -- SYN reformatted on 08/20/15 for CR.Composite. OTHER SCRIPTS WILL NEED THIS FIX

#########
## VIC ##
os.chdir(dir_data+dir_VIC)
mtclim = xray.open_dataset('CA.MTCLIM.irrad.daily.nc')

#########
## WRF ##
os.chdir(dir_data+dir_WRF)
wrf = xray.open_dataset('CA.WRF.irrad.daily.nc')
wrf = wrf.rename({'lon':'longitude','lat':'latitude'})

In [3]:
##### Process daily ground observations
os.chdir(dir_data+dir_grobs)
grobs = xray.open_dataset('CA.grobs_combined.daily.xray.nc')

# Reformat names/coordinates
grobs = grobs.resample(freq='D', dim='time', how='mean')
grobs.SWdwn.values[grobs.SWdwn.values == 0] = np.nan
grobs = grobs.rename({'lon':'longitude','lat':'latitude'})
grobs.longitude.values = -grobs.longitude.values

# Conversion from python 2.7 to 3.5 caused the xarray strings to become byte literals
grobs.network.values = list(map(lambda x: x.decode('utf-8'),grobs.network.values))
grobs.station.values = list(map(lambda x: x.decode('utf-8'),grobs.station.values))

## Remove stations outside study domain
# Bounding box - ragged domain
LL_rag = [-120,34.5]
LR_rag = [-115,34.5]
UR_rag = [-118.5,41]
UL_rag = [-123.5,41]

# Station lat/lon
stat_lat = grobs.latitude.values
stat_lon = grobs.longitude.values

# Ragged domain, CA study area
line_west_m = (UL_rag[1]-LL_rag[1])/(UL_rag[0]-LL_rag[0])
line_west_b = LL_rag[1]-line_west_m*LL_rag[0]
line_east_m = (UR_rag[1]-LR_rag[1])/(UR_rag[0]-LR_rag[0])
line_east_b = LR_rag[1]-line_east_m*LR_rag[0]
ind = np.nonzero((stat_lon > (stat_lat -line_west_b)/line_west_m) & \
                (stat_lon < (stat_lat-line_east_b)/line_east_m) & \
                (stat_lat > LR_rag[1]) & (stat_lat < UL_rag[1]))
# Reindex
stat_to_keep = grobs.station[ind]
grobs = grobs.reindex(station=stat_to_keep)

## Append station group information
# Station data excel speadsheet
stdat = pd.read_csv('All_StationSummary.v2.csv',sep= ',', \
                index_col=0,na_values=[-9999,'NaN']) # Read the supporting information
# station data -> xray structure
stdat = xray.Dataset(coords={'station': (['station'], stdat.index), \
                             'Grouping': (['station'],  stdat.Grouping)})    
# Merge into ground observation structure
grobs = grobs.merge(stdat,join='inner')
grobs = grobs.rename({'SWdwn':'grobs'})

# Removed bad stations, but netcdf no longer contains these stations
grobs = grobs.drop(['smj','Pacific_Grove','Woodland'],dim='station')

In [4]:
#### Nan months with less than 90% of days observing
numdays_permonth = (~np.isnan(grobs.grobs)).resample(freq='M', dim='time', how='mean',label='right')
numdays_permonth=numdays_permonth.reindex(time=grobs.time,method='bfill')
grobs.grobs.values[numdays_permonth.values < .9] = np.nan

In [5]:
#### Combine
## List w/ all irradiance datasets
daily_mean = {}
daily_mean['syn'] = syn
daily_mean['nldas'] = nldas
daily_mean['mtclim'] = mtclim
daily_mean['wrf'] = wrf
daily_mean['grobs'] = grobs

## Names
# Product names
pr_names = ['mtclim','nldas','syn','wrf']
num_products = np.size(pr_names)
grouping = ['north cv','south cv','west of crest','foothills','east of crest']

In [6]:
####################################################
## Find grid point containing each ground station ##
####################################################
# Station lat and lon
lon_stat = grobs.longitude.values
lat_stat = grobs.latitude.values

for pr in pr_names:        
    if pr == 'grobs':
        continue
    # lat/lon for product
    lon_rad = daily_mean[pr].longitude.values
    lat_rad = daily_mean[pr].latitude.values  
    # mesh
    lonm, latm = np.meshgrid(lon_rad,lat_rad)
    
    # Empty numpy array
    to_merge = np.empty((daily_mean[pr].time.size,grobs.station.size))
    
    ## Product values in each grid containing station
    for stat in grobs.station.values:
        
        # Station index
        stat_ind = np.where(stat == grobs.station.values)
        # Distance to product grid lat-lon
        d = (latm-lat_stat[stat_ind])**2 + (lonm-lon_stat[stat_ind])**2
        # Index of closest product grid
        dind = np.where(d==np.amin(d))
        # Grad grid values at the station, put into xray dataset
        if (daily_mean[pr].SWdwn.values[:,dind[0][0],dind[1][0]] == 0).all() \
                | np.isnan(daily_mean[pr].SWdwn.values[:,dind[0][0],dind[1][0]]).all():
            to_merge[:,stat_ind[0]] = daily_mean[pr].SWdwn.values[:,dind[0][0]+1,dind[1][0],np.newaxis]
        else:
            to_merge[:,stat_ind[0]] = daily_mean[pr].SWdwn.values[:,dind[0][0],dind[1][0],np.newaxis]
    
    ## Merge products w/ grobs xray structure
    to_merge_ds = xray.Dataset({pr:(('time','station'),to_merge), \
                                    'time':daily_mean[pr].time.values,\
                                    'station':grobs.station.values})
    grobs = grobs.merge(to_merge_ds)

In [7]:
#### Mean bias for each group
grobs_diff = xray.Dataset()
grouping_var = 'Grouping'
for pr in pr_names:
    grdiff = grobs[pr]-grobs['grobs']
    ## Mean bias for each group
    grdiff = kray.group_mean(grdiff.swap_dims({'station':'Grouping'}),grouping_var,grouping)
    grobs_diff[pr] = kray.combinevars(grdiff,grouping,new_dim_name='Grouping',combinevarname=pr)

##   Transmissivity (products and ground obs.)

In [9]:
#### Elevation Angle
d = pd.to_datetime(grobs.time.values)
el = xray.Dataset()
for stat in grobs.station.values:
    el[stat] = (('time'),solargeo.avg_el(d,\
                     grobs.loc[{'station':stat}].latitude.values,\
                     grobs.loc[{'station':stat}].longitude.values,\
                     ref='END'))
el = kray.combinevars(el,el.data_vars,new_dim_name='station',combinevarname='el')
el.coords['time'] = d

# Add to xray Dataset
grobs['el'] = (('station','time'),el)

the new syntax is .resample(...).mean()


In [10]:
## Transmissivity
tau = grobs.grobs/(np.sin(grobs.el*np.pi/180)*1365)

In [11]:
#### Monthly anomaly
ds_in = tau.swap_dims({'station':'Grouping'})
grouping_var = 'Grouping'
var = 'tau'
period = 'month'

anom_mon = kray.group_anom(ds_in,grouping_var,grouping,var,period)
anom_mon = anom_mon.resample(freq='M',dim='time',how='mean')

In [13]:
#### Daily anomaly
ds_in = tau.swap_dims({'station':'Grouping'})
grouping_var = 'Grouping'
var = 'tau'
period = 'day'

anom_day = kray.group_anom(ds_in,grouping_var,grouping,var,period)

##     Auxilary data @ Ground Observations

In [18]:
#### Precip obs @ CIMIS stations
# Load
os.chdir(dir_data+dir_grobs)
precip = xray.open_dataset('CA.CIMIS.Precip.daily.xray.nc')

# Conversion from python 2.7 to 3.5 caused the xarray strings to become byte literals
precip.network.values = list(map(lambda x: x.decode('utf-8'),precip.network.values))
precip.station.values = list(map(lambda x: x.decode('utf-8'),precip.station.values))

# Format
precip = precip.Precip
precip = precip.resample(freq='D', dim='time', how='mean')

# Add to xray Dataset
precip = precip.reindex_like(grobs)
grobs['precip'] = (('station','time'),precip)

#### Aggregate to grouping variables
ds_in = grobs.precip.swap_dims({'station':'Grouping'})
grouping_var = 'Grouping'

precip = kray.group_mean(ds_in,grouping_var,grouping)
precip = kray.combinevars(precip,grouping,new_dim_name='Grouping',combinevarname='precip')

## Add to anom xray data structure
anom_day['precip'] = (('Grouping','time'),precip)
grobs_diff['precip'] = (('Grouping','time'),precip)

In [19]:
##### total aerosol optical depth
def parse(str1, str2):
    date_str = str1+'_'+str2
    dt = datetime.strptime(date_str,"%d:%m:%Y_%H:%M:%S")
    return dt

iter_count = 0
os.chdir(dir_data+dir_AN)
fid = '020101_121231_Fresno.ONEILL_20'
with open(fid, 'r') as datafile:
    # Skip the header of arbitrary size and read the column names
    if iter_count == 0:
        line = datafile.readline()
        iter_count = iter_count+1
    while not line.startswith('Date(dd:mm:yyyy)'):
        line = datafile.readline()
    line = line.replace('\n','')
    col_names = line.split(',')
    col_names[-1] = 'wavelength-1'
    col_names.append('wavelength-2')
    col_names.append('wavelength-3')
    col_names.append('wavelength-4')
    col_names.append('wavelength-5')
    data = pd.read_csv(datafile, names=col_names, sep= ',',\
                       parse_dates={'Datetime' : [0,1]},date_parser=parse,\
                       index_col='Datetime')
aod = data['Total_AOD_500nm[tau_a]']

## Add to xray Dataset
aod = aod.reindex(grobs.time.values,fill_value=np.nan)
grobs['aod'] = (('time'),aod)
anom_day['aod'] = (('time'),aod)
grobs_diff['aod'] = (('time'),aod)

In [20]:
##### water vapor
iter_count = 0
fid = '020101_121231_Fresno.lev20'
with open(fid, 'r') as datafile:
    # Skip the header of arbitrary size and read the column names
    if iter_count == 0:
        line = datafile.readline()
        iter_count = iter_count+1
    while not line.startswith('Date(dd-mm-yy)'):
        iter_count = iter_count+1
        line = datafile.readline()
        if iter_count > 10:
            break
    line = line.replace('\n','')
    col_names = line.split(',')
    data = pd.read_csv(datafile, names=col_names, sep= ',',\
                       parse_dates={'Datetime' : [0,1]},date_parser=parse,\
                       index_col='Datetime')
wv = data['Water(cm)']

## Add to xray Dataset
wv = wv.reindex(grobs.time.values,fill_value=np.nan)
grobs['wv'] = (('time'),wv)
anom_day['wv'] = (('time'),wv)
grobs_diff['wv'] = (('time'),wv)

In [26]:
#### Save formatted data for later use in other scripts
os.chdir(dir_data)
grobs.to_netcdf('grobs.daily.nc')
grobs_diff.to_netcdf('grobs_bias.daily.nc')
anom_day.to_netcdf('tau_anom.daily.nc')
anom_mon.to_netcdf('tau_anom.monthly.nc')
tau.to_dataset(name='tau').to_netcdf('tau.daily.nc')