In [5]:
%matplotlib inline

import os
import re
import csv
import math
import gzip
import glob
import pickle
import datetime
import warnings
import numpy as np
import netCDF4
import matplotlib.pyplot as pl
from dateutil import parser
from datetime import datetime, timedelta
from matplotlib.dates import MonthLocator, DateFormatter, WeekdayLocator

from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

warnings.filterwarnings('ignore')

In [6]:
def f_nbin(t, step=1):
    return int(np.round((np.nanmax(t)-np.nanmin(t))/step))

def get_datetime_from_filename(fname):
    dtstr = re.findall('[0-9]{8}', fname)[0]
    the_date = datetime.strptime(dtstr, '%Y%m%d')
    
    return the_date

In [7]:
def msgr_read(flist, max_value):
    
    # ref1 PR reflectivity
    # ref2 GR reflectivity
    # ref3 PR reflec S-band, snow
    # ref4 PR reflec S-band, hail
    # ref5 GR reflectivity Ku-band
    
    refl_offset    = []
    refl_diff_list = []
    refl_std       = []
    dtime          = []
    sample_size    = []
    
    for fd in flist:
        
        #use only first pass. Using second pass becomes more difficult when collating refl diff samples
        pass_no = fd[-4]
        if pass_no == '2':
            continue
        
        with netCDF4.Dataset(fd, "r") as ncid:
            
            #read data from file
            date     = parser.parse(re.findall("[0-9]{8}", fd)[0])
            z        = ncid['z'][:]
            zbb      = ncid['zbb'][:]
            zbbw     = ncid['bbwidth'][:]
            ref_GR   = ncid['ref2'][:]  #GR in dB
            ref_SR_S = ncid['ref3'][:]  #SR (Sband/Snow) in dB
            nrej_GR  = ncid['nrej1'][:]
            nrej_SR  = ncid['nrej2'][:]
            ntot_GR  = ncid['ntot1'][:]
            ntot_SR  = ncid['ntot2'][:]
            
        #apply same masking as matchvol to recalculate means as required
        pos         = np.logical_and(z > (zbb - zbbw/2), z < (zbb + zbbw/2)) | (np.isnan(ref_GR)) | (np.isnan(ref_SR_S)) | (ref_GR < 21) | (ref_SR_S < 21) | (nrej_GR/ntot_GR > 0.3) | (nrej_SR/ntot_SR > 0.3)
        if not np.any(~pos):
            continue
        ref_GR[pos]   = np.nan
        ref_SR_S[pos] = np.nan
        
        ref_diff = ref_GR - ref_SR_S
        ref_diff = ref_diff[(ref_diff <= np.nanpercentile(ref_diff,95)) & (ref_diff >= np.nanpercentile(ref_diff,5))]
        
        #gerneate and append stats
        offset_val = np.nanmean(ref_diff)
        
        #check for limit on reflectivity offet, skip if it exceeds this limit
        if abs(offset_val) > max_value:
            continue

        refl_offset.append(offset_val)
        refl_diff_list.append(ref_diff)
        refl_std.append(np.nanstd(ref_diff))        
        sample_size.append(len(ref_diff))
        dtime.append(date)

    #convert to array
    refl_offset = np.array(refl_offset)
    refl_std    = np.array(refl_std)
    sample_size = np.array(sample_size)
    dtime_ku    = np.array(dtime)

    return refl_offset, refl_diff_list, dtime_ku, refl_std, sample_size

In [11]:
msgr_root_path = '/g/data/rq0/level_1a/msgr_out'
rca_root_path  = '/g/data/rq0/level_1a/rca_out'
cal_date_path  = '/g/data/rq0/level_1a/dbz_cal'
out_root_path  = '/g/data/rq0/level_1a/combined_plotly/all'
start_yr       = 1997
end_yr         = 2018
rid_list       = [3,50]#list(range(1,79+1))
max_msgr_value = 10 #used to filter msgr

for rid_num in rid_list:
    
    rid = str(rid_num).zfill(2)
    
    ###########################################################################
    # CAL DATA LOAD
    ###########################################################################
    print('loading manual calibration data lists')
    #read date change file
    date_fn   = '_'.join([rid, 'caldata.txt'])
    date_ffn  = '/'.join([cal_date_path, date_fn])
    if os.path.isfile(date_ffn):
        c1  = []
        c2  = []
        c3  = []
        with open(date_ffn, 'r') as f:
            reader = csv.reader(f, delimiter=',')
            for row in reader:
                c1.append(row[0])
                c2.append(row[1])
                c3.append(row[2])
        
        cal_subset_start_list = np.array([datetime.strptime(date, '%Y%m%d') for date in c1])
        cal_subset_end_list   = np.array([datetime.strptime(date, '%Y%m%d') for date in c2])
        cal_subset_mean       = np.array(c3)
        cal_date_plot         = True
    else:
        cal_date_plot = False

    ###########################################################################
    # MSGR DATA LOAD
    ###########################################################################
    print('loading msgr data for', rid)
    #read msgr_data
    msgr_path            = '/'.join([msgr_root_path, rid])
    msgr_file_list       = sorted(glob.glob(msgr_path + '/*.nc'))
    if len(msgr_file_list) > 0:
        #process data
        msgr_refl_offset, msgr_diff_list, msgr_dtime, msgr_refl_std, msgr_sample_size = msgr_read(msgr_file_list, max_msgr_value)
        #plot flag
        msgr_plot = True
    else:
        msgr_plot = False
        print('no msgr files found')
        
    #calc total mean
    msgr_total_mean = np.round(np.nanmean(msgr_refl_offset), decimals = 2)
    
    ###########################################################################
    # Process MSGR into subsets using cal dates
    ###########################################################################
    #setup up plotting arrays
    if msgr_plot and cal_date_plot:
        plt_mean_cal_dt  = np.array([])
        plt_mean_cal_db   = np.array([])
        plt_mean_breaks_dt = np.array([])
        plt_mean_breaks_db = np.array([])
        for i in range(len(cal_subset_start_list)):
            plt_mean_cal_dt = np.append(plt_mean_cal_dt, [cal_subset_start_list[i], cal_subset_end_list[i], np.nan])
            plt_mean_cal_db = np.append(plt_mean_cal_db, [cal_subset_mean[i], cal_subset_mean[i], np.nan])
            #build breaks from subset
            plt_mean_breaks_dt = np.append(plt_mean_breaks_dt, [cal_subset_start_list[i], cal_subset_start_list[i], np.nan, cal_subset_end_list[i], cal_subset_end_list[i], np.nan])
            plt_mean_breaks_db = np.append(plt_mean_breaks_db, [-100, 100, np.nan, -100, 100, np.nan]) 
                
    ###########################################################################
    # LOAD RCA AND GENERATE YEAR STATS
    ###########################################################################
    rca_daily_dt         = np.array([])
    rca_daily_median     = np.array([])
    rca_daily_upqrtl     = np.array([])
    rca_daily_95perc     = np.array([])
    rca_daily_smpl       = np.array([])
    rca_daily_vol_95perc = np.array([])
    
    rca_wk_dt            = np.array([])
    rca_wk_median        = np.array([])
    rca_wk_upqrtl        = np.array([])
    rca_wk_95perc        = np.array([])
    rca_wk_vol_95perc    = np.array([])
    
    #loop through each year
    for yr in range(start_yr, end_yr+1):

        #start and end date
        bd, ed = datetime(yr, 1, 1), datetime(yr, 12, 31)

        #read rca
        rca_fn   = '_'.join([rid, str(yr), 'rca_daily.nc'])
        rca_ffn  = '/'.join([rca_root_path, rid, rca_fn])
        
        if os.path.isfile(rca_ffn):

            with netCDF4.Dataset(rca_ffn, "r") as ncid:
                #load time
                daily_time_var = ncid.variables['daily_time']
                rca_daily_dt   = np.append(rca_daily_dt, netCDF4.num2date(daily_time_var[:], daily_time_var.units))
                wk_time_var    = ncid.variables['weekly_time']
                rca_wk_dt      = np.append(rca_wk_dt, netCDF4.num2date(wk_time_var[:], wk_time_var.units))

                #load daily stats
                rca_daily_median     = np.append(rca_daily_median, ncid['daily_median'][:])
                rca_daily_upqrtl     = np.append(rca_daily_upqrtl, ncid['daily_upqrtl'][:])
                rca_daily_95perc     = np.append(rca_daily_95perc, ncid['daily_95perc'][:])
                rca_daily_smpl       = np.append(rca_daily_smpl, ncid['daily_sample'][:])
                rca_daily_vol_95perc = np.append(rca_daily_vol_95perc, ncid['daily_vol_95perc'][:])

                #load weekly stats
                rca_wk_median     = np.append(rca_wk_median, ncid['wk_median'][:])
                rca_wk_upqrtl     = np.append(rca_wk_upqrtl, ncid['wk_upqrtl'][:])
                rca_wk_95perc     = np.append(rca_wk_95perc, ncid['wk_95perc'][:])
                rca_wk_vol_95perc = np.append(rca_wk_vol_95perc, ncid['wk_vol_95perc'][:])
        else:
            print('no rca file found for year ', yr)
    #check if any data has been loaded    
    if len(rca_wk_dt) > 0:
        rca_plot = True
    else:
        rca_plot = False
            
    ###########################################################################
    # PLOTLY
    ###########################################################################
    
    #check if there's something to plot
    if rca_plot and msgr_plot:
        #update user
        display('Plotting RCA/MSGR/CAL for ' + rid)
        
        #init plotly
        fig      = tools.make_subplots(rows=5, cols=1, shared_xaxes=True)
        datemin  = np.datetime64(str(start_yr), 'Y')
        datemax  = np.datetime64(str(end_yr), 'Y') + np.timedelta64(1, 'Y')

        ###########################################################################
        # MSGR PLOTLY
        ###########################################################################        
        
        if msgr_plot:
            trace_msgr = go.Scatter(
                                    name='MSGR plot',
                                    x=msgr_dtime,
                                    y=np.round(msgr_refl_offset, decimals = 2),
                                    mode='markers',
                                    marker=dict(
                                        color = msgr_sample_size,
                                        colorscale='YlOrRd',
                                        cmax=1000,
                                        cauto=False,
                                        colorbar=dict(len=0.2, y=0.95, titleside='right',title='No. Samples'),
                                        reversescale=True,
                                        showscale=True,
                                        size=8
                                        ),
                                    error_y=dict(
                                        type='data',
                                        symmetric=True,
                                        array=np.round(msgr_refl_std, decimals = 2),
                                        color='#000000',
                                        thickness=1
                                        )
                                    )
            if cal_date_plot:
                trace_msgr_mean = go.Scatter(
                        name='MSGR Mean',
                        x=plt_mean_cal_dt,
                        y=plt_mean_cal_db,
                        line = dict(
                            color = ('rgb(22, 96, 167)'),
                            width = 2,
                            dash = 'solid')
                            )
                trace_msgr_breaks = go.Scatter(
                        name='MSGR Mean',
                        x=plt_mean_breaks_dt,
                        y=plt_mean_breaks_db,
                        line = dict(
                            color = ('rgb(22, 96, 167)'),
                            width = 1,
                            dash = 'solid')
                            )                
                
            else:
                trace_msgr_mean = go.Scatter(
                    name='MSGR Mean',
                    x=[datemin, datemax],
                    y=[msgr_total_mean, msgr_total_mean],
                    line = dict(
                        color = ('rgb(22, 96, 167)'),
                        width = 2,
                        dash = 'dash')
                        )
                trace_msgr_breaks = go.Scatter(
                    name='MSGR Mean',
                    x=[datemin, datemin, np.nan, datemax, datemax, np.nan],
                    y=[-100, 100, np.nan, -100, 100, np.nan],
                    line = dict(
                        color = ('rgb(22, 96, 167)'),
                        width = 1,
                        dash = 'solid')
                        )

            fig.append_trace(trace_msgr, 1, 1)
            fig.append_trace(trace_msgr_mean, 1, 1)
            fig.append_trace(trace_msgr_breaks, 1, 1)
            
            y_lim       = 5
            if msgr_plot:
                msgr_absmax = math.ceil(np.nanmax(np.abs(msgr_refl_offset)))
                if msgr_absmax >= y_lim:
                    y_lim = msgr_absmax + 1

            
            
            fig['layout']['xaxis1'].update(title='',
                                    range=[datemin, datemax],
                                    )
            fig['layout']['yaxis1'].update(title='MSGR Difference (dB)',
                                    range=[-y_lim, y_lim],
                                    )
                                           
        ###########################################################################
        # RCA PLOTLY
        ###########################################################################
                                           
        if rca_plot:
            #fix sample size
            rca_daily_smpl[rca_daily_smpl>100] = 100
            #MEDIAN
            trace_rca_median_daily = go.Scatter(
                                            name='RCA Median Daily',
                                            x=rca_daily_dt,
                                            y=rca_daily_median,
                                            mode='markers',
                                            marker=dict(
                                                color = rca_daily_smpl,
                                                colorscale = 'Blues',
                                                cmin=0,
                                                cmax=100,
                                                cauto=False,
                                                reversescale=True,
                                                colorbar=dict(len=0.2, y=0.725, titleside='right',title='No. Samples'),
                                                showscale=True,
                                                size=3
                                                ),
                                            )
            trace_rca_median_weekly = go.Scatter(
                                            name='RCA Median Weekly',
                                            x=rca_wk_dt,
                                            y=rca_wk_median,
                                            line = dict(
                                                color = 'blue',
                                                width = 1,)
                                            ) 
            #UPPER QUARTILE
            trace_rca_upqart_daily = go.Scatter(
                                            name='RCA U. Quartile Daily',
                                            x=rca_daily_dt,
                                            y=rca_daily_upqrtl,
                                            mode='markers',
                                            marker=dict(
                                                color = rca_daily_smpl,
                                                colorscale = 'Greens',
                                                cmin=0,
                                                cmax=100,
                                                cauto=False,
                                                reversescale=True,
                                                colorbar=dict(len=0.2, y=0.5, titleside='right',title='No. Samples'),
                                                showscale=True,
                                                size=3
                                                ),
                                            )
            trace_rca_upqart_weekly = go.Scatter(
                                            name='RCA U. Quartile  Weekly',
                                            x=rca_wk_dt,
                                            y=rca_wk_upqrtl,
                                            line = dict(
                                                color = 'green',
                                                width = 1,)
                                            )          
            #95th percentile
            trace_rca_95perc_daily = go.Scatter(
                                            name='RCA 95th Perc. Daily',
                                            x=rca_daily_dt,
                                            y=rca_daily_95perc,
                                            mode='markers',
                                            marker=dict(
                                                color = rca_daily_smpl,
                                                colorscale = 'Reds',
                                                cmin=0,
                                                cmax=100,
                                                cauto=False,
                                                reversescale=False,
                                                colorbar=dict(len=0.2, y=0.275, titleside='right',title='No. Samples'),
                                                showscale=True,
                                                size=3
                                                ),
                                            )
            trace_rca_95perc_weekly = go.Scatter(
                                            name='RCA 95th Perc. Weekly',
                                            x=rca_wk_dt,
                                            y=rca_wk_95perc,
                                            line = dict(
                                                color = 'red',
                                                width = 1,)
                                            )         
            #Vol 95perc
            trace_rca_vol95perc_daily = go.Scatter(
                                            name='RCA Vol. 95Perc Daily',
                                            x=rca_daily_dt,
                                            y=rca_daily_vol_95perc,
                                            mode='markers',
                                            marker=dict(
                                                color = rca_daily_smpl,
                                                colorscale = 'Greys',
                                                cmin=0,
                                                cmax=100,
                                                cauto=False,
                                                reversescale=True,
                                                colorbar=dict(len=0.2, y=0.05, titleside='right',title='No. Samples'),
                                                showscale=True,
                                                size=3
                                                ),
                                            )
            trace_rca_vol95perc_weekly = go.Scatter(
                                            name='RCA Vol. 95Perc Weekly',
                                            x=rca_wk_dt,
                                            y=rca_wk_vol_95perc,
                                            line = dict(
                                                color = 'purple',
                                                width = 1,)
                                            )       
            fig.append_trace(trace_rca_median_daily,  2, 1)
            fig.append_trace(trace_rca_median_weekly, 2, 1)
            fig.append_trace(trace_rca_upqart_daily,  3, 1)
            fig.append_trace(trace_rca_upqart_weekly, 3, 1)
            fig.append_trace(trace_rca_95perc_daily,  4, 1)
            fig.append_trace(trace_rca_95perc_weekly, 4, 1)
            fig.append_trace(trace_rca_vol95perc_daily,  5, 1)
            fig.append_trace(trace_rca_vol95perc_weekly, 5, 1)
            
            base_min    = 44
            base_max    = 55
            med_max     = np.nanmax(rca_daily_median)
            if med_max < base_max:
                med_max = base_max
            uq_max      = np.nanmax(rca_daily_upqrtl)
            if uq_max < base_max:
                uq_max = base_max
            perc_max    = np.nanmax(rca_daily_95perc)
            if perc_max < base_max:
                perc_max = base_max
            volperc_max = np.nanmax(rca_daily_vol_95perc)
            if volperc_max < base_max:
                volperc_max = base_max
                        
                
                
            fig['layout']['xaxis2'].update(title='',
                                    range=[datemin, datemax],
                                    )
            fig['layout']['yaxis2'].update(title='RCA Median (dB)',
                                    range=[base_min, med_max],
                                    )   
            fig['layout']['xaxis3'].update(title='',
                                    range=[datemin, datemax],
                                    )
            fig['layout']['yaxis3'].update(title='RCA U-Q. (dB)',
                                    range=[base_min, uq_max],
                                    )   
            fig['layout']['xaxis4'].update(title='',
                                    range=[datemin, datemax],
                                    )
            fig['layout']['yaxis4'].update(title='RCA 95th Perc. (dB)',
                                    range=[base_min, perc_max],
                                    )
            fig['layout']['xaxis5'].update(title='',
                                    range=[datemin, datemax],
                                    )
            fig['layout']['yaxis5'].update(title='RCA Vol. 95Perc. (dB)',
                                    range=[base_min, volperc_max],
                                    )   
        #legend
        fig['layout']['legend'].update(orientation="h")

        #output
        out_fn   = '_'.join(['cal_rca_msgr', rid, str(start_yr), str(end_yr)]) + '.html'
        plotly_ffn = '/'.join([out_root_path, out_fn])
        #save
        try:
            py.plot(fig, filename = plotly_ffn)
            print('processed: ', out_fn)
        except:
            print('FAILED FOR: ', out_fn)


print('done')
        

loading manual calibration data lists
loading msgr data for 03


'Plotting RCA/MSGR/CAL for 03'

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x1,y2 ]
[ (3,1) x1,y3 ]
[ (4,1) x1,y4 ]
[ (5,1) x1,y5 ]

processed:  cal_rca_msgr_03_1997_2018.html
loading manual calibration data lists
loading msgr data for 50


'Plotting RCA/MSGR/CAL for 50'

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x1,y2 ]
[ (3,1) x1,y3 ]
[ (4,1) x1,y4 ]
[ (5,1) x1,y5 ]

processed:  cal_rca_msgr_50_1997_2018.html
done
