In [1]:
## Networks for import -- CIMIS and SIO/CDWR
flag_xray_proc = 0

## Import statements
import numpy as np
import xray
import pandas as pd
from netCDF4 import Dataset
from netCDF4 import num2date, date2num
from datetime import datetime, timedelta
import pytz

# OS interaction
import sys
import os

In [2]:
## Directory Lists
# General directories
dir_data_out = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/data/GroundObs'
dir_print = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/Graphics'

# Individual data directories
dir_cv_cimis_ucipm = '/Users/karllapo/gdrive/GroundObs/CA_CentralValley.UCDavis/RAW/CIMIS/UCDavis_IPM.Daily'
dir_cv_cimis = '/Users/karllapo/gdrive/GroundObs/CA_CentralValley.UCDavis/RAW/CIMIS/CIMIS.Hourly'
dir_sio_cdwr = '/Users/karllapo/gdrive/SnowHydrology/proj/CloudClimatology/data/GroundObs/YOS.SIO.Obs'
# dir_cv_pestcast = '/Users/karllapo/gdrive/GroundObs/CA_CentralValley.UCDavis/RAW/PestCast'

In [5]:
###########################################
## Ground Obs - CIMIS, UC IPM & SIO-CDWR ##
###########################################
if flag_xray_proc == 1:
    # Load previously formatted data (in xray format)
#     os.chdir(dir_data_out)
#     with xray.open_dataset('cimis_ucipm.obs.daily.xray.nc') as cimis_ucipm_daily:
#         print(cimis_ucipm_daily.keys())
    print('to be updated!')
elif flag_xray_proc == 0:
    
    # Empty lists and dictionary literals
    stations = []
    lat = []
    lon = []
    elev = []
    network = []
    grobs = {}
    
    # time zone variables
    tz_pst = pytz.timezone('US/Pacific')

    ###########################
    ##### READ CIMIS DATA #####
    ###########################
    # Read supporting station information
    os.chdir(dir_data_out)
    stdat = pd.read_csv('All_StationSummary.v2.csv',sep= ',', \
                    index_col=0,na_values=[-9999,'NaN']) # Read the supporting information
    stdat = stdat.groupby('Network').get_group('CIMIS')
    
    # Files to read
    os.chdir(dir_cv_cimis_ucipm)
    content = os.listdir(os.getcwd())
    num_files = len([name for name in os.listdir('.') if os.path.isfile(name)])
        
    for files in content:
        # Only read .txt files
        if files[-4:] == '.txt':
            with open(files, 'r') as datafile:
                # Skip the header of arbitrary size and read the column names
                line = datafile.readline()
                while not line.startswith('"Station"'):
                    line = datafile.readline()
                
                ## format the header line for passing to 'read_csv'
                line = line.replace('\n','')
                line = line.replace('"', '')
                col_names = line.split(',')
                data = pd.read_csv(datafile, names=col_names, sep= ',', parse_dates={'Datetime' : [1,2]},\
                                   index_col='Datetime',skipinitialspace=True,\
                                   converters={'Time': lambda x: str('2359')})
                data.index = data.index.tz_localize(pytz.timezone('US/Pacific'))
                
                ## Read SW data, asign to PST, and get SW that passes QC
                sitename = data['Station'][0][0:-2]
                grobs[sitename] = data['Solar']
                grobs[sitename].index = data.index
                grobs[sitename] = pd.DataFrame(grobs[sitename])
                grobs[sitename].columns =['SWdwn']
    
                ## Fill in elevation/lat/lon
                if sitename in stdat.index:
                    print(("Processed Site: "+sitename))
                    elev.append(stdat.loc[sitename]['elevation (m)'])
                    lat.append(stdat.loc[sitename]['lat'])
                    lon.append(stdat.loc[sitename]['lon'])
                    stations.append(sitename)
                    network.append('CIMIS_IPM')
                else:
                    print(("Site: "+sitename+" is missing from master list"))
                    
    #########################
    ##### READ SIO DATA #####
    #########################
    os.chdir(dir_sio_cdwr)
    content = os.listdir(os.getcwd())
    num_files = len([name for name in os.listdir('.') if os.path.isfile(name)])
    
     # Read supporting station information
    os.chdir(dir_data_out)
    stdat = pd.read_csv('All_StationSummary.v2.csv',sep= ',', \
                    index_col=0,na_values=[-9999,'NaN']) # Read the supporting information
    stdat = stdat.groupby('Network').get_group('CDWR')
   
    for files in content:
        # Only read QC formatted files
        if files[-10:] == 'Rad.QC.txt':
            sitename = files.split('.')[0]
            stations.append(sitename)
            na_value = ['   NaN']
            
            # Read SW data, asign to PST, and get SW that passes QC
            grobs_yos = pd.read_csv(files,sep= '\t', parse_dates=True, index_col=0, na_values=na_value)
            grobs_yos.index = grobs_yos.index.tz_localize(pytz.utc).tz_convert(tz_pst)
            grobs_yos['SWdwn_QC'] = grobs_yos['SWdwn_Wm^-2'].where(grobs_yos['QCFlag'] == 0)
        
            # New data frame w/ daily means
            grobs_yos_daily = grobs_yos['SWdwn_Wm^-2'].resample('D', how='mean').to_frame(name='SWdwn_D')
            grobs_yos_daily['SWdwn_D_QC'] = grobs_yos['SWdwn_QC'].resample('D', how='mean')
            grobs_yos_daily['SWdwn_D_proc'] = grobs_yos['SWdwn_proc'].resample('D', how='mean')
            
            # List containing DataFrames with daily, processed only
            grobs[sitename] = pd.DataFrame(grobs_yos_daily['SWdwn_D_proc'])
            grobs[sitename].columns = ['SWdwn']
            
            # Fill in elevation/lat/lon/network
            elev.append(stdat.loc[sitename]['elevation (m)'])
            lat.append(stdat.loc[sitename]['lat'])
            lon.append(stdat.loc[sitename]['lon'])
            network.append('CDWR')
            
            print(("Processed Site: "+sitename))
            
    ########################
    ##### COMBINE DATA #####
    ########################
    # Concatenate 
    grobs_daily = pd.concat(grobs,axis=0,keys=stations)
    grobs_daily = pd.DataFrame(grobs_daily)

    # Convert to xray
    ds = xray.Dataset.from_dataframe(grobs_daily)
    ds = ds.rename({'level_0':'station','level_1':'time'})
    
    # Fill in descriptive variables
    ds.coords['lat'] = ('station',lat)
    ds.coords['lon'] = ('station',lon)
    ds.coords['elev'] = ('station',elev)
    ds.coords['network'] = ('station',network)
    
    ## Output to netcdf
    os.chdir(dir_data_out)
    ds.to_netcdf('CA.grobs_combined.daily.xray.nc')

Processed Site: Alpaugh
Processed Site: ALTURAS
Processed Site: ARVIN
Processed Site: ARYOSECO
Processed Site: ATASCADERO
Processed Site: Auburn
Processed Site: BENNETT_VALLEY
Processed Site: Big_Bear_Lake
Processed Site: BISHOP
Processed Site: Black_Point
Processed Site: BLACKWLL
Processed Site: BLYTHE_NE
Processed Site: Borrego_Springs
Processed Site: BRNTWOOD
Processed Site: BRWNSVLY
Processed Site: BRYTE
Processed Site: BUNTNGVL
Processed Site: CAMARILLO
Processed Site: CAMINO
Processed Site: Carmel
Processed Site: CASTROVL
Processed Site: Coalinga
Processed Site: COLUSA
Processed Site: CONCORD
Processed Site: CUYAMA
Processed Site: DAVIS
Processed Site: Delano
Processed Site: Denair_II
Processed Site: Diamond_Springs
Processed Site: DIXON
Processed Site: DURHAM
Processed Site: Esparto
Processed Site: FAIR_OAKS
Processed Site: FAMOSO
Processed Site: FIREBAGH
Processed Site: Five_Points_SW
Processed Site: FIVE_PTS
Processed Site: FRESNO
Processed Site: GERBER
Processed Site: Gilroy
