# Historical Full Basins: Extract Other Weather

The purpose of this notebook is to extract the annual temperature time series in the format needed for weather generator formulation

## Imports and Parameters

In [1]:
%matplotlib inline

In [2]:
import os
import pickle
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import datetime as dt
from IPython.display import display, HTML, Image
from math import sqrt

In [3]:
StartTS = pd.Timestamp( 1991, 1, 1, )
EndTS = pd.Timestamp(2020, 12, 31, 23, 59)

In [4]:
IN_DIR1 = r'C:\Users\nmartin\Documents\EAA_HCP\Data\SwRI_Processed\Processed_Historical'
OUT_DIR = r'C:\Users\nmartin\Documents\EAA_HCP\Data\SwRI_Processed\Processed_Historical\OtherWeather'

In [5]:
CutThresh = 0.255
TOTYRDAYS = 366

## Load Dictionary of DataFrames

In [6]:
InFiler = os.path.normpath( os.path.join( IN_DIR1, "FBas_MetTS_1980through2020_Dict.pkl" ) )
with open( InFiler, 'rb') as IF:
    BasinsDFDict = pickle.load(IF)
# end with
basKeys = sorted( BasinsDFDict.keys() )

## Get the Averages DataFrames by Basin

In [7]:
StartYr = StartTS.year
StopYr = EndTS.year
StartYr, StopYr

(1991, 2020)

In [8]:
OWDryAvesDict = dict()
OWWetAvesDict = dict()

In [9]:
for bKey in basKeys:
    cBasDF = BasinsDFDict[bKey]
    WStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    tWMax = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tWAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tWMin = np.zeros( TOTYRDAYS, dtype=np.float32 )
    DStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    tDMax = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tDAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tDMin = np.zeros( TOTYRDAYS, dtype=np.float32 )
    for yY in range(StartYr, StopYr+1):
        curStart = pd.Timestamp( yY, 1, 1, 0, )
        curStop = pd.Timestamp( yY, 12, 31, 23, 59, )
        curYrDF = cBasDF.loc[curStart:curStop].copy()
        NumDays = len( curYrDF )
        if NumDays > 365:
            LastDay = TOTYRDAYS
        else:
            LastDay = ( TOTYRDAYS - 1 )
        # end if
        IsWet = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsDry = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsWet[:LastDay] = np.where( curYrDF["Pre_mm"] >= CutThresh, 1, 0 )
        IsDry[:LastDay] = np.where( curYrDF["Pre_mm"] < CutThresh, 1, 0 )
        WStateCnt[:LastDay] += IsWet[:LastDay]
        DStateCnt[:LastDay] += IsDry[:LastDay]
        tWMax[:LastDay] += ( curYrDF["MaxT_C"].to_numpy(dtype=np.float32) * IsWet[:LastDay] )
        tDMax[:LastDay] += ( curYrDF["MaxT_C"].to_numpy(dtype=np.float32) * IsDry[:LastDay] )
        tWAve[:LastDay] += ( curYrDF["AveT_C"].to_numpy(dtype=np.float32) * IsWet[:LastDay] )
        tDAve[:LastDay] += ( curYrDF["AveT_C"].to_numpy(dtype=np.float32) * IsDry[:LastDay] )
        tWMin[:LastDay] += ( curYrDF["MinT_C"].to_numpy(dtype=np.float32) * IsWet[:LastDay] )
        tDMin[:LastDay] += ( curYrDF["MinT_C"].to_numpy(dtype=np.float32) * IsDry[:LastDay] )
    # end for
    wDenom = np.where( WStateCnt > 0, np.array( WStateCnt, dtype=np.float32), np.nan )
    wMulti = 1.0 / wDenom
    dDenom = np.where( DStateCnt > 0, np.array( DStateCnt, dtype=np.float32),np.nan )
    dMulti = 1.0 / dDenom
    WDDict = { "Wet Counts" : WStateCnt,
               "AMaxT_C" : (tWMax * wMulti),
               "AAveT_C" : (tWAve * wMulti),
               "AMinT_C" : (tWMin * wMulti), }
    DDDict = { "Dry Counts" : DStateCnt,
               "AMaxT_C" : (tDMax * dMulti),
               "AAveT_C" : (tDAve * dMulti),
               "AMinT_C" : (tDMin * dMulti), }
    # build our data frames
    DaysIndexer = [ x for x in range(1, (TOTYRDAYS + 1), 1)]
    WetDF = pd.DataFrame( index=DaysIndexer, data=WDDict )
    DryDF = pd.DataFrame( index=DaysIndexer, data=DDDict )
    OWDryAvesDict[bKey] = DryDF
    OWWetAvesDict[bKey] = WetDF
# end for

Serialize these dictionaries for later use

In [10]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeathWet_Ave_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( OWWetAvesDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [11]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeathDry_Ave_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( OWDryAvesDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

## Calculate Standard Deviation DataFrames by Basin

In [12]:
OWDryStdDict = dict()
OWWetStdDict = dict()

In [13]:
for bKey in basKeys:
    cBasDF = BasinsDFDict[bKey]
    cBasWetAveDF = OWWetAvesDict[bKey]
    cBasDryAveDF = OWDryAvesDict[bKey]
    WStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    tWMax = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tWAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tWMin = np.zeros( TOTYRDAYS, dtype=np.float32 )
    DStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    tDMax = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tDAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
    tDMin = np.zeros( TOTYRDAYS, dtype=np.float32 )
    curWAves = cBasWetAveDF.copy()
    curDAves = cBasDryAveDF.copy()
    for yY in range(StartYr, StopYr+1):
        curStart = pd.Timestamp( yY, 1, 1, 0, )
        curStop = pd.Timestamp( yY, 12, 31, 23, 59, )
        curYrDF = cBasDF.loc[curStart:curStop].copy()
        NumDays = len( curYrDF )
        if NumDays > 365:
            LastDay = TOTYRDAYS
        else:
            LastDay = ( TOTYRDAYS - 1 )
        # end if
        IsWet = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsDry = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsWet[:LastDay] = np.where( curYrDF["Pre_mm"] >= CutThresh, 1, 0 )
        IsDry[:LastDay] = np.where( curYrDF["Pre_mm"] < CutThresh, 1, 0 )
        WStateCnt[:LastDay] += IsWet[:LastDay]
        DStateCnt[:LastDay] += IsDry[:LastDay]
        # now do the standard deviation calculations
        curWAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        # do max t
        curWAve[:LastDay] = curWAves["AMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        ctNew = curYrDF["MaxT_C"].to_numpy(dtype=np.float32)
        tWMax[:LastDay] += ( IsWet[:LastDay] * ( ctNew - curWAve[:LastDay] )**2.0 )
        tDMax[:LastDay] += ( IsDry[:LastDay] * ( ctNew - curDAve[:LastDay] )**2.0 )
        # do ave T
        curWAve[:LastDay] = curWAves["AAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        ctNew = curYrDF["AveT_C"].to_numpy(dtype=np.float32)
        tWAve[:LastDay] += ( IsWet[:LastDay] * ( ctNew - curWAve[:LastDay] )**2.0 )
        tDAve[:LastDay] += ( IsDry[:LastDay] * ( ctNew - curDAve[:LastDay] )**2.0 )
         # do min t
        curWAve[:LastDay] = curWAves["AMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        ctNew = curYrDF["MinT_C"].to_numpy(dtype=np.float32)
        tWMin[:LastDay] += ( IsWet[:LastDay] * ( ctNew - curWAve[:LastDay] )**2.0 )
        tDMin[:LastDay] += ( IsDry[:LastDay] * ( ctNew - curDAve[:LastDay] )**2.0 )
    # end for
    wDenom = np.where( WStateCnt > 0, np.array( WStateCnt, dtype=np.float32), np.nan )
    wMulti = 1.0 / wDenom
    dDenom = np.where( DStateCnt > 0, np.array( DStateCnt, dtype=np.float32),np.nan )
    dMulti = 1.0 / dDenom
    WDDict = { "Wet Counts" : WStateCnt,
               "SDMaxT_C" : np.sqrt((tWMax * wMulti)),
               "SDAveT_C" : np.sqrt((tWAve * wMulti)),
               "SDMinT_C" : np.sqrt((tWMin * wMulti)), }
    DDDict = { "Dry Counts" : DStateCnt,
               "SDMaxT_C" : np.sqrt((tDMax * dMulti)),
               "SDAveT_C" : np.sqrt((tDAve * dMulti)),
               "SDMinT_C" : np.sqrt((tDMin * dMulti)), }
    # build our data frames
    DaysIndexer = [ x for x in range(1, (TOTYRDAYS + 1), 1)]
    WetDF = pd.DataFrame( index=DaysIndexer, data=WDDict )
    WetDF.interpolate( inplace=True )
    DryDF = pd.DataFrame( index=DaysIndexer, data=DDDict )
    DryDF.interpolate( inplace=True )
    OWDryStdDict[bKey] = DryDF
    OWWetStdDict[bKey] = WetDF
# end for

Serialize these dictionaries for later use

In [14]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeathWet_Std_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( OWWetStdDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [15]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeathDry_Std_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( OWDryStdDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

## Calculate Z-Score by Basin

In [16]:
ZTAveDict = dict()
ZTMaxDict = dict()
ZTMinDict = dict()
TrMaxDict = dict()
TrMinDict = dict()
ZTRho0Dict = dict()
ZTRho1Dict = dict()

In [17]:
for bKey in basKeys:
    cBasDF = BasinsDFDict[bKey]
    cBasWetAveDF = OWWetAvesDict[bKey]
    cBasDryAveDF = OWDryAvesDict[bKey]
    cBasWetStdDF = OWWetStdDict[bKey]
    cBasDryStdDF = OWDryStdDict[bKey]
    WStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    allZMax = np.zeros( TOTYRDAYS, dtype=np.float32 )
    allZAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
    allZMin = np.zeros( TOTYRDAYS, dtype=np.float32 )
    DStateCnt = np.zeros( TOTYRDAYS, dtype=np.int32 )
    ZTMxMax = np.ones( TOTYRDAYS, dtype=np.float32 )
    ZTMxMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TMxMax = -999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TMxMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    ZTAvMax = np.ones( TOTYRDAYS, dtype=np.float32 )
    ZTAvMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TAvMax = -999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TAvMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    ZTMnMax = np.ones( TOTYRDAYS, dtype=np.float32 )
    ZTMnMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TMnMax = -999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    TMnMin = 999.0 * np.ones( TOTYRDAYS, dtype=np.float32 )
    curWAves = cBasWetAveDF.copy()
    curDAves = cBasDryAveDF.copy()
    curWStds = cBasWetStdDF.copy()
    curDStds = cBasDryStdDF.copy()
    for yY in range(StartYr, StopYr+1):
        curStart = pd.Timestamp( yY, 1, 1, 0, )
        curStop = pd.Timestamp( yY, 12, 31, 23, 59, )
        curYrDF = cBasDF.loc[curStart:curStop].copy()
        NumDays = len( curYrDF )
        if NumDays > 365:
            LastDay = TOTYRDAYS
        else:
            LastDay = ( TOTYRDAYS - 1 )
        # end if
        IsWet = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsDry = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsWet[:LastDay] = np.where( curYrDF["Pre_mm"] >= CutThresh, 1, 0 )
        IsDry[:LastDay] = np.where( curYrDF["Pre_mm"] < CutThresh, 1, 0 )
        WStateCnt[:LastDay] += IsWet[:LastDay]
        DStateCnt[:LastDay] += IsDry[:LastDay]
        # now calculate the Zs
        curWAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curWStd = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStd = np.zeros( TOTYRDAYS, dtype=np.float32 )
        # do max t
        curWAve[:LastDay] = curWAves["AMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStd[:LastDay] = curWStds["SDMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDStd[:LastDay] = curDStds["SDMaxT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curWStd, out=curWStdM, where=curWStd>0.01 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curDStd, out=curDStdM, where=curDStd>0.01 )
        ctNew = curYrDF["MaxT_C"].to_numpy(dtype=np.float32)
        wZs = ( IsWet[:LastDay] * ( ( ctNew - curWAve[:LastDay] ) * curWStdM[:LastDay] ) )
        dZs = ( IsDry[:LastDay] * ( ( ctNew - curDAve[:LastDay] ) * curDStdM[:LastDay] ) )
        allZ = wZs + dZs
        allZMax[:LastDay] += allZ
        ZTMxMax[:LastDay] = np.where( allZ > ZTMxMax[:LastDay], allZ, ZTMxMax[:LastDay] )
        ZTMxMin[:LastDay] = np.where( allZ < ZTMxMin[:LastDay], allZ, ZTMxMin[:LastDay] )
        TMxMax[:LastDay] = np.where( ctNew > TMxMax[:LastDay], ctNew, TMxMax[:LastDay] )
        TMxMin[:LastDay] = np.where( ctNew < TMxMin[:LastDay], ctNew, TMxMin[:LastDay] )
        # do ave t
        curWAve[:LastDay] = curWAves["AAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStd[:LastDay] = curWStds["SDAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDStd[:LastDay] = curDStds["SDAveT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curWStd, out=curWStdM, where=curWStd>0.01 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curDStd, out=curDStdM, where=curDStd>0.01 )
        ctNew = curYrDF["AveT_C"].to_numpy(dtype=np.float32)
        wZs = ( IsWet[:LastDay] * ( ( ctNew - curWAve[:LastDay] ) * curWStdM[:LastDay] ) )
        dZs = ( IsDry[:LastDay] * ( ( ctNew - curDAve[:LastDay] ) * curDStdM[:LastDay] ) )
        allZ = wZs + dZs
        allZAve[:LastDay] += allZ
        ZTAvMax[:LastDay] = np.where( allZ > ZTAvMax[:LastDay], allZ, ZTAvMax[:LastDay] )
        ZTAvMin[:LastDay] = np.where( allZ < ZTAvMin[:LastDay], allZ, ZTAvMin[:LastDay] )
        TAvMax[:LastDay] = np.where( ctNew > TAvMax[:LastDay], ctNew, TAvMax[:LastDay] )
        TAvMin[:LastDay] = np.where( ctNew < TAvMin[:LastDay], ctNew, TAvMin[:LastDay] )
        # do min t
        curWAve[:LastDay] = curWAves["AMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves["AMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStd[:LastDay] = curWStds["SDMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curDStd[:LastDay] = curDStds["SDMinT_C"].loc[:LastDay].to_numpy(dtype=np.float32)
        curWStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curWStd, out=curWStdM, where=curWStd>0.01 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curDStd, out=curDStdM, where=curDStd>0.01 )
        ctNew = curYrDF["MinT_C"].to_numpy(dtype=np.float32)
        wZs = ( IsWet[:LastDay] * ( ( ctNew - curWAve[:LastDay] ) * curWStdM[:LastDay] ) )
        dZs = ( IsDry[:LastDay] * ( ( ctNew - curDAve[:LastDay] ) * curDStdM[:LastDay] ) )
        allZ = wZs + dZs
        allZMin[:LastDay] += allZ
        ZTMnMax[:LastDay] = np.where( allZ > ZTMnMax[:LastDay], allZ, ZTMnMax[:LastDay] )
        ZTMnMin[:LastDay] = np.where( allZ < ZTMnMin[:LastDay], allZ, ZTMnMin[:LastDay] )
        TMnMax[:LastDay] = np.where( ctNew > TMnMax[:LastDay], ctNew, TMnMax[:LastDay] )
        TMnMin[:LastDay] = np.where( ctNew < TMnMin[:LastDay], ctNew, TMnMin[:LastDay] )
    # end for
    AllStateCnt = WStateCnt + DStateCnt
    ZMaxDict = { "All Counts" : AllStateCnt,
                 "MxZMaxT_C" : ZTMxMax,
                 "MxZAveT_C" : ZTAvMax,
                 "MxZMinT_C" : ZTMnMax, }
    MaxDict = { "AllCounts" : AllStateCnt,
                "MxMaxT_C" : TMxMax,
                "MxAveT_C" : TAvMax,
                "MxMinT_C" : TMnMax, }
    ZMinDict = { "All Counts" : AllStateCnt,
                 "MnZMaxT_C" : ZTMxMin,
                 "MnZAveT_C" : ZTAvMin,
                 "MnZMinT_C" : ZTMnMin, }
    MinDict = { "AllCounts" : AllStateCnt,
                "MnMaxT_C" : TMxMin,
                "MnAveT_C" : TAvMin,
                "MnMinT_C" : TMnMin, }
    zDenom = np.where( AllStateCnt > 0, np.array( AllStateCnt, dtype=np.float32), np.nan )
    zMulti = 1.0 / zDenom
    ZDDict = { "All Counts" : AllStateCnt,
               "ZMaxT_C" : (allZMax * zMulti),
               "ZAveT_C" : (allZAve * zMulti),
               "ZMinT_C" : (allZMin * zMulti), }
    # build our data frames
    DaysIndexer = [ x for x in range(1, (TOTYRDAYS + 1), 1)]
    ZAveDF = pd.DataFrame( index=DaysIndexer, data=ZDDict )
    ZMaxDF = pd.DataFrame( index=DaysIndexer, data=ZMaxDict )
    ZMinDF = pd.DataFrame( index=DaysIndexer, data=ZMinDict )
    MaxDF = pd.DataFrame( index=DaysIndexer, data=MaxDict )
    MinDF = pd.DataFrame( index=DaysIndexer, data=MinDict )
    ZTAveDict[bKey] = ZAveDF
    ZTMaxDict[bKey] = ZMaxDF
    ZTMinDict[bKey] = ZMinDF
    TrMaxDict[bKey] = MaxDF
    TrMinDict[bKey] = MinDF
    # now need to loop through again to calculate the matrix coefficients
    # have average daily Z score for each variable. Need to go
    #  back through and calculate correlation coefficients
    r0_12_numer = 0.0
    r0_12_1denom = 0.0
    r0_12_2denom = 0.0
    r1_11_numer = 0.0
    r1_11_0denom = 0.0
    r1_11_1denom = 0.0
    r1_12_numer = 0.0
    r1_12_1denom = 0.0
    r1_12_2denom = 0.0
    r1_21_numer = 0.0
    r1_21_1denom = 0.0
    r1_21_2denom = 0.0
    r1_22_numer = 0.0
    r1_22_0denom = 0.0
    r1_22_1denom = 0.0    
    for yY in range(StartYr, StopYr+1):
        curStart = pd.Timestamp( yY, 1, 1, 0, )
        curStop = pd.Timestamp( yY, 12, 31, 23, 59, )
        curYrDF = cBasDF.loc[curStart:curStop].copy()
        NumDays = len( curYrDF )
        if NumDays > 365:
            LastDay = TOTYRDAYS
        else:
            LastDay = ( TOTYRDAYS - 1 )
        # end if
        IsWet = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsDry = np.zeros( TOTYRDAYS, dtype=np.int32 )
        IsWet[:LastDay] = np.where( curYrDF["Pre_mm"] >= CutThresh, 1, 0 )
        IsDry[:LastDay] = np.where( curYrDF["Pre_mm"] < CutThresh, 1, 0 )
        WStateCnt[:LastDay] += IsWet[:LastDay]
        DStateCnt[:LastDay] += IsDry[:LastDay]
        # now calculate Zs
        # now calculate the Zs
        curWAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDAve = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curWStd = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStd = np.zeros( TOTYRDAYS, dtype=np.float32 )
        # max temperature
        AveZ = ZAveDF.loc[:LastDay, "ZMaxT_C"].to_numpy(dtype=np.float32)
        ctNew = curYrDF["MaxT_C"].to_numpy(dtype=np.float32)
        curWAve[:LastDay] = curWAves.loc[:LastDay, "AMaxT_C"].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves.loc[:LastDay, "AMaxT_C"].to_numpy(dtype=np.float32)
        curWStd[:LastDay] = curWStds.loc[:LastDay, "SDMaxT_C"].to_numpy(dtype=np.float32)
        curDStd[:LastDay] = curDStds.loc[:LastDay, "SDMaxT_C"].to_numpy(dtype=np.float32)
        curWStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curWStd, out=curWStdM, where=curWStd>0.01 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curDStd, out=curDStdM, where=curDStd>0.01 )
        wZs = ( IsWet[:LastDay] * ( ( ctNew - curWAve[:LastDay] ) * curWStdM[:LastDay] ) )
        dZs = ( IsDry[:LastDay] * ( ( ctNew - curDAve[:LastDay] ) * curDStdM[:LastDay] ) )
        allZ = wZs + dZs
        DiffZ1_0 = allZ - AveZ
        mDiffZ1_0 = np.where( np.isnan( DiffZ1_0 ), 0.0, DiffZ1_0 )
        DiffSqZ1_0 = (allZ - AveZ)**2.0
        mDiffSqZ1_0 = np.where( np.isnan( DiffSqZ1_0 ), 0.0, DiffSqZ1_0 )
        DiffZ1_1 = ( np.roll( allZ, 1 ) - AveZ )
        mDiffZ1_1 = np.where( np.isnan( DiffZ1_1 ), 0.0, DiffZ1_1 )
        # min temp
        AveZ = ZAveDF.loc[:LastDay, "ZMinT_C"].to_numpy(dtype=np.float32)
        ctNew = curYrDF["MinT_C"].to_numpy(dtype=np.float32)
        curWAve[:LastDay] = curWAves.loc[:LastDay, "AMinT_C"].to_numpy(dtype=np.float32)
        curDAve[:LastDay] = curDAves.loc[:LastDay, "AMinT_C"].to_numpy(dtype=np.float32)
        curWStd[:LastDay] = curWStds.loc[:LastDay, "SDMinT_C"].to_numpy(dtype=np.float32)
        curDStd[:LastDay] = curDStds.loc[:LastDay, "SDMinT_C"].to_numpy(dtype=np.float32)
        curWStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        curDStdM = np.zeros( TOTYRDAYS, dtype=np.float32 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curWStd, out=curWStdM, where=curWStd>0.01 )
        np.divide( np.ones( TOTYRDAYS, dtype=np.float32 ), curDStd, out=curDStdM, where=curDStd>0.01 )
        wZs = ( IsWet[:LastDay] * ( ( ctNew - curWAve[:LastDay] ) * curWStdM[:LastDay] ) )
        dZs = ( IsDry[:LastDay] * ( ( ctNew - curDAve[:LastDay] ) * curDStdM[:LastDay] ) )
        allZ = wZs + dZs
        DiffZ2_0 = allZ - AveZ
        mDiffZ2_0 = np.where( np.isnan( DiffZ2_0 ), 0.0, DiffZ2_0 )
        DiffSqZ2_0 = (allZ - AveZ)**2.0
        mDiffSqZ2_0 = np.where( np.isnan( DiffSqZ2_0 ), 0.0, DiffSqZ2_0 )
        DiffZ2_1 = ( np.roll( allZ, 1 ) - AveZ )
        mDiffZ2_1 = np.where( np.isnan( DiffZ2_1 ), 0.0, DiffZ2_1 )
        # now do our running sums
        r0_12_numer += np.dot( mDiffZ1_0, mDiffZ2_0 )
        r0_12_1denom += mDiffSqZ1_0.sum()
        r0_12_2denom += mDiffSqZ2_0.sum()
        r1_11_numer += np.dot( mDiffZ1_0, mDiffZ1_1 )
        r1_11_0denom += mDiffSqZ1_0.sum()
        r1_11_1denom += mDiffSqZ1_0.sum()
        r1_12_numer += np.dot( mDiffZ1_0, mDiffZ2_1 )
        r1_12_1denom += mDiffSqZ1_0.sum()
        r1_12_2denom += mDiffSqZ2_0.sum()
        r1_21_numer += np.dot( mDiffZ2_0, mDiffZ1_1 )
        r1_21_1denom += mDiffSqZ1_0.sum()
        r1_21_2denom += mDiffSqZ2_0.sum()
        r1_22_numer += np.dot( mDiffZ2_0, mDiffZ2_1 )
        r1_22_0denom += mDiffSqZ2_0.sum()
        r1_22_1denom += mDiffSqZ2_0.sum()
    # end of year for
    # calculate our coefficients
    r0_12 = r0_12_numer / ( sqrt(r0_12_1denom) * sqrt(r0_12_2denom))
    r1_11 = r1_11_numer / ( sqrt(r1_11_0denom) * sqrt(r1_11_1denom) )
    r1_22 = r1_22_numer / ( sqrt(r1_22_0denom) * sqrt(r1_22_1denom) )
    r1_12 = r1_12_numer / ( sqrt(r1_12_1denom) * sqrt(r1_12_2denom) )
    r1_21 = r1_21_numer / ( sqrt(r1_21_2denom) * sqrt(r1_21_1denom) )
    # now make our data frames
    # rho 0 correlation
    r0dpDDict = { "rho_X1" : [ 1.0, r0_12 ],
                  "rho_X2" : [ r0_12, 1.0 ], }
    r0Ind = [ "rho_1X", "rho_2X", ]
    r0dpDF = pd.DataFrame( index=r0Ind, data=r0dpDDict )
    # rho 1 cross-correlation
    r1dpDDict = { "rho_X1_L1" : [ r1_11, r1_12,],
                  "rho_X2_L1" : [ r1_21, r1_22,], }
    r1dpDF = pd.DataFrame( index=r0Ind, data=r1dpDDict )
    # assign to basin tracking dictionaries
    ZTRho0Dict[bKey] = r0dpDF
    ZTRho1Dict[bKey] = r1dpDF
# end for

Serialize the tracking dictionaries for later use

In [18]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_ZAve_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( ZTAveDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [19]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_ZMax_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( ZTMaxDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [20]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_ZMin_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( ZTMinDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [21]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_TempMax_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( TrMaxDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [22]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_TempMin_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( TrMinDict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [23]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_Rho0_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( ZTRho0Dict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

In [24]:
OutFiler = os.path.normpath( os.path.join( OUT_DIR, "OWeath_Rho1_%s-%s_DFDict.pkl" % (StartYr, StopYr) ) )
with open( OutFiler, 'wb' ) as OF:
    pickle.dump( ZTRho1Dict, OF, protocol=pickle.HIGHEST_PROTOCOL )
# end with

Output the correlation DataFrames to spreadsheets for easy examination

In [25]:
outXLSX = os.path.normpath( os.path.join( OUT_DIR, "OWeath_Rho0and1_byBasin.xlsx" ) )
writer = pd.ExcelWriter( outXLSX )
workbook  = writer.book
format1 = workbook.add_format({'num_format': '#,##0.000000'})
for bKey in basKeys:
    curDF = ZTRho0Dict[bKey]
    cLabel = "%s_rho0" % bKey
    curDF.to_excel( writer, sheet_name=cLabel, )
    # adjust columns
    writer.sheets[cLabel].set_column( 0, 0, 15 )
    for column in curDF:
        column_width = max(curDF[column].astype(str).map(len).max()+6, len(column)+6)
        col_idx = curDF.columns.get_loc(column)
        writer.sheets[cLabel].set_column(col_idx+1, col_idx+1, column_width, format1)
    # end for
    curDF = ZTRho1Dict[bKey]
    cLabel = "%s_rho1" % bKey
    curDF.to_excel( writer, sheet_name=cLabel, )
    # adjust columns
    writer.sheets[cLabel].set_column( 0, 0, 15 )
    for column in curDF:
        column_width = max(curDF[column].astype(str).map(len).max()+6, len(column)+6)
        col_idx = curDF.columns.get_loc(column)
        writer.sheets[cLabel].set_column(col_idx+1, col_idx+1, column_width, format1)
    # end for
# end for
writer.close()