# This notebook is investigating TB and surface temperature data for the Hunza met station locations

## Load in all the modules needed

In [1]:
%pylab notebook
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import shapely.geometry as sgeom
import matplotlib.pyplot as plt
from netCDF4 import Dataset, num2date
import numpy as np
import pandas as pd
import rasterio
from cetbtools.ease2conv import Ease2Transform
import time

Populating the interactive namespace from numpy and matplotlib


In [2]:
# navigate to where scripts are saved
%cd /projects/brodzik/ipynb_melt_onset/scripts/
%ls

/projects/brodzik/ipynb_melt_onset/scripts
CETB_algorithms.py  CETB_analysis.py~       [0m[01;34m__pycache__[0m/
CETB_analysis.py    CETB_read_functions.py


In [3]:
# load the custom functions
from CETB_read_functions import read_Tb
from CETB_read_functions import calc_DAV
from CETB_analysis import MOD_array_year

In [4]:
# Set pixel lookups by met station
# ref: /Users/brodzik/Desktop/GIS_data/Pakistan/Hunza/wapda_met_stations_CETBlocs.v4.xlsx
Khunjerab = {"name": "Khunjerab",
             "lat": 36.8411,
             "lon": 75.4192,
             "elevation": 4440,
             "row3km": 220,
             "col3km": 41,
             "row25km": 27,
             "col25km": 5
           }

In [5]:
# Set pixel lookups by met station
# ref: /Users/brodzik/Desktop/GIS_data/Pakistan/Hunza/wapda_met_stations_CETBlocs.v4.xlsx
Ziarat = {"name": "Ziarat",
             "lat": 36.798,
             "lon": 74.482,
             "elevation": 3020,
             "row3km": 249,
             "col3km": 34,
             "row25km": 31,
             "col25km": 4
           }

In [6]:
# Set pixel lookups by met station
# ref: /Users/brodzik/Desktop/GIS_data/Pakistan/Hunza/wapda_met_stations_CETBlocs.v4.xlsx
Naltar = {"name": "Naltar",
          "lat": 36.1667,
          "lon": 74.1833,
          "elevation": 2898,
          "row3km": 264,
          "col3km": 51,
          "row6km": 132,
          "col6km": 26,
          "row25km": 33,
          "col25km": 6
           }

## Read TB data

In [7]:
#Specify region, satellite, sensor, channel, and image reconstruction algorithm of interest in file name
# this notebook will read in 2 CETB datasets so that channels/algorithms/sensors can be compared
region='UIB'  #make this the same syntax as cubefilenames and sub-directory
platform='AQUA'   #'AQUA' for AMSRE, 'F13','F14','F15'... for SSMI
sensor='AMSRE'  #'AMSRE', 'SSMI', etc.
channel='36V'  #'36V','36H', '18V','18H', etc. '19V','19H' and '37V','37H' for SSMI)
version='v1.3'
proj='N'

if sensor=='SSMI':
    provider='CSU' 
elif sensor=='AMSRE':
    provider='RSS'

cubeDir = '/work/PMESDR/CETB_v1.3/%s_%s/%s/cubes_%s/' % (platform, sensor, proj, region)    

# prefix filepath
prefix_GRD = 'CETB.cubefile.%s.%s_%s-%s-GRD-%s-%s' % (region, platform, sensor, channel, provider, version)
prefix_SIR = 'CETB.cubefile.%s.%s_%s-%s-SIR-%s-%s' % (region, platform, sensor, channel, provider, version)

# years for each sensor
# F13, May 95 - Nov 09
if platform=='F13':
    # F13, May 95 - Nov 09
    Years = [2002,2003,2004,2005,2006,2007,2008,2009]
elif platform=='F14':
    # F14, May 97 - Aug 08
    Years=[2002,2003,2004,2005,2006,2007,2008]
elif platform=='F15':
    # F15, Feb 00 - Jun 17
    Years=[2002,2003,2004,2005,2006,2007,2008,2009,2010,2011]
elif platform=='F17':
    # F17, Mar 08 - Jun 17
    Years=[2009,2010,2011,2012,2013,2014,2015,2016,2017]
elif platform=='AQUA':
    # AQUA AMSR-E: Jun 02 - Oct 11
    Years=[2003,2004,2005,2006,2007,2008,2009,2010,2011]
    #Years=[2003,2004]

In [8]:
# load in SIR TB data for the whole cube
data_SIR=read_Tb(cubeDir, prefix_SIR, Years)
CETB_SIR=data_SIR['TB']   # 3-D Tb time-series array of TB

# load GRD Tb data
data_GRD=read_Tb(cubeDir, prefix_GRD, Years)
CETB_GRD=data_GRD['TB']

No subset specified, fetching complete cube...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2003.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2004.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2005.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2006.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2007.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2008.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2009.TB.nc...
Next filename=/work/PMESDR/CETB_v1.3/AQUA_AMSRE/N/cubes_UIB/CETB.cubefile.UIB.AQUA_AMSRE-36V-SIR-RSS-v1.3.2010.TB

In [9]:
# calculate DAV for the Tb data that was imported
DAV_SIR=calc_DAV(CETB_SIR)
DAV_GRD=calc_DAV(CETB_GRD)

In [10]:
if sensor == 'AMSRE':
    window=10   # window for MOD algorithm, '10' would be 5 days (2 measurements per day)
    DAV_threshold=40
    Tb_threshold=252
elif sensor == 'SSMI':
    window=14   # use 7 days for SSMI?
    DAV_threshold=18
    Tb_threshold=240
else:
    window=10   # not sure about SSMIS
    DAV_threshold=25
    Tb_threshold=252
count=3    # number of Tb/DAV exceedances to trigger MOD

In [11]:
def calculate_station_MOD_for_year(station, data_SIR, DAV_SIR, data_GRD, DAV_GRD,
                                   year, window, count, DAV_threshold, Tb_threshold):
    
    # calculate the MOD dates for SIR and GRD

    # Define subsets for the one pixel that contains the current station
    # rows_cols subsets should be [row, row+1, col, col+1]
    rows_cols_env = [station['row3km'], station['row3km']+1,
                     station['col3km'], station['col3km']+1]
    rows_cols_GRD = [station['row25km'], station['row25km']+1,
                     station['col25km'], station['col25km']+1]
    print("rows_cols:")
    print(rows_cols_env)
    print(rows_cols_GRD)
            
    # sir MOD array - MOD will be in day of year (DOY)
    MOD_DOY_array_year=MOD_array_year(cubeDir, prefix_SIR, 
                                      data_SIR['TB'][:,
                                               rows_cols_env[0]:rows_cols_env[1],
                                               rows_cols_env[2]:rows_cols_env[3]],
                                      DAV_SIR[:,
                                              rows_cols_env[0]:rows_cols_env[1],
                                              rows_cols_env[2]:rows_cols_env[3]], 
                                      rows_cols_env, data_SIR['cal_date'], 
                                      year, window, count, DAV_threshold, Tb_threshold)

    MOD_DOY_array_GRD_year=MOD_array_year(cubeDir, prefix_GRD,
                                          data_GRD['TB'][:,
                                                         rows_cols_GRD[0]:rows_cols_GRD[1],
                                                         rows_cols_GRD[2]:rows_cols_GRD[3]],
                                          DAV_GRD[:,
                                                  rows_cols_GRD[0]:rows_cols_GRD[1],
                                                  rows_cols_GRD[2]:rows_cols_GRD[3]], 
                                          rows_cols_GRD, data_GRD['cal_date'], 
                                          year, window, count, DAV_threshold, Tb_threshold)
    return {"SIR_MOD": MOD_DOY_array_year,
            "GRD_MOD": MOD_DOY_array_GRD_year}

In [12]:
Years, window, count, DAV_threshold, Tb_threshold

([2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011], 10, 3, 40, 252)

In [15]:
station = Khunjerab
#station = Ziarat
station_SIR_MOD = []
station_GRD_MOD = []
for year in Years:
    out = calculate_station_MOD_for_year(station, data_SIR, DAV_SIR, data_GRD, DAV_GRD,
                                         year, window, count, DAV_threshold, Tb_threshold)
    print(out["SIR_MOD"])
    if math.isnan(out["SIR_MOD"]):
        print("no MOD detected")
    else:
        station_SIR_MOD.append(pd.to_datetime(year*1000 + int(out["SIR_MOD"][0]), format='%Y%j'))
        
    print(out["GRD_MOD"])
    if math.isnan(out["GRD_MOD"]):
        print("no MOD detected")
    else:
        station_GRD_MOD.append(pd.to_datetime(year*1000 + int(out["GRD_MOD"][0]), format='%Y%j'))

print("SIR_MOD")
print(station_SIR_MOD)
print("GRD_MOD")
print(station_GRD_MOD)

rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[143.0]
[142.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[136.0]
[134.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[113.0]
[113.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[117.0]
[114.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[104.0]
[104.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[124.0]
[123.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[149.0]
[136.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[118.0]
[118.0]
rows_cols:
[220, 221, 41, 42]
[27, 28, 5, 6]
[116.0]
[116.0]
SIR_MOD
[Timestamp('2003-05-23 00:00:00'), Timestamp('2004-05-15 00:00:00'), Timestamp('2005-04-23 00:00:00'), Timestamp('2006-04-27 00:00:00'), Timestamp('2007-04-14 00:00:00'), Timestamp('2008-05-03 00:00:00'), Timestamp('2009-05-29 00:00:00'), Timestamp('2010-04-28 00:00:00'), Timestamp('2011-04-26 00:00:00')]
GRD_MOD
[Timestamp('2003-05-22 00:00:00'), Timestamp('2004-05-13 00:00:00'), Timestamp('2005-04-23 00:00:00'), Timestamp('2006-04-24 00:

In [None]:
def read_station_year(station_name, year):
    
    stationFile = "/work/charis/ti_model/surface_met_data/WAPDA/v1/%s/%s_%d.qc1.txt" % (
        station_name.lower(), station_name.lower(), year)
    print("Reading station data from %s" % stationFile)
    station_df = pd.read_csv(stationFile, sep='\s+', skiprows=[0])

    station_df.rename(columns={'#YYYY': 'year',
                               'MM': 'month', 
                               'DD': 'day'}, inplace=True)

    station_df['yyyy-mm-dd'] = pd.to_datetime(station_df[['year', 'month', 'day']])
    station_df.set_index('yyyy-mm-dd', inplace=True)
    return station_df

In [None]:

# FIXME: no station data for 2011, handle the case where there is no station data for a given year
# Khunjerab, Ziarat have no station data for 2011
frames = [ read_station_year(station['name'], year) for year in Years[:-1] ]
station_df = pd.concat(frames)
station_df.replace(-9999.0, np.nan, inplace=True)
station_df

## Read discharge data for Hunza at Dainyor Bridge


In [None]:
def read_discharge_for_year(year):
    
    # Now read the individual years, they are a slightly different format
    dischargeFile = '/work/charis/ti_model/streamflow/Pakistan/Hunza/hunza_at_dainyor_bridge_daily_timeseries_%d.txt' % year
    df = pd.read_csv(dischargeFile, header=None, sep='\s+',
                     names=['year', 'month', 'day', 'doy', 'Q', 'dummy'])

    df['Date'] = pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')
    df.set_index('Date', drop=True, inplace=True)
    df.drop(['year','month','day', 'doy', 'dummy'], axis=1, inplace=True)

    return df

In [None]:
dischargeFile = '/work/charis/ti_model/streamflow/Pakistan/Hunza/hunza_at_danyor_bridge_daily_timeseries_1966to2000.txt'
hunza = pd.read_csv(dischargeFile, header=None, sep='\s+',
                    names=['year', 'month', 'day', 'doy', 'Q'])

hunza['Date'] = pd.to_datetime(hunza.year*10000 + hunza.month*100 + hunza.day,format='%Y%m%d')
hunza.set_index('Date', drop=True, inplace=True)
hunza.drop(['year','month','day', 'doy'], axis=1, inplace=True)

sub_index = hunza.index[hunza.index < pd.to_datetime('2001-01-01')]
hunza = hunza.loc[sub_index]

# Fill in the data for later years
# Sometimes this was missing in the original file, but sometimes it was there,
# but we have higher confidence in the individual year files per Andy's notes
frames = [ read_discharge_for_year(year) for year in [2001, 2002, 2003, 2004, 2008, 2009, 2010] ]
discharge_df = pd.concat(frames)
hunza = pd.concat([hunza, discharge_df])

hunza.replace(-9999.99, np.nan, inplace=True)

# Reindex, with nans for any missing dates so they don't get plotted with connecting lines
idx = pd.date_range(hunza.index[0], hunza.index[-1])
hunza = hunza.reindex(idx, fill_value=np.nan)

In [None]:
# subset for the years we are plotting
sub_index = hunza.index[hunza.index >= pd.to_datetime('%s-01-01' % Years[0])]
hunza = hunza.loc[sub_index]
sub_index = hunza.index[hunza.index <= pd.to_datetime('%s-12-31' % Years[-1])]
hunza = hunza.loc[sub_index]
#hunza

In [None]:
fig, ax = plt.subplots(1,1)
hunza['Q'].plot(ax=ax, label='$Q$', color='dimgray')

## For each station, fetch the data there and plot it as tseries and histogram

In [None]:
do_MOD_lines = True
stations = [Khunjerab, Ziarat, Naltar]
for station in stations[1:2]:
    print(station)
    station_GRD = data_GRD['TB'][:, 
                                 station['row25km']:station['row25km']+1,
                                 station['col25km']:station['col25km']+1].flatten()
    station_SIR = data_SIR['TB'][:, 
                                 station['row3km']:station['row3km']+1,
                                 station['col3km']:station['col3km']+1].flatten()
    
    station_GRD_DAV = DAV_GRD[:, 
                              station['row25km']:station['row25km']+1,
                              station['col25km']:station['col25km']+1].flatten()
    station_SIR_DAV = DAV_SIR[:, 
                              station['row3km']:station['row3km']+1,
                              station['col3km']:station['col3km']+1].flatten()

    grd_col_name = "%s_GRD" % channel
    sir_col_name = "%s_SIR" % channel
    grd_dav_col_name = "%s_GRD DAV" % channel
    sir_dav_col_name = "%s_SIR DAV" % channel
    df = pd.DataFrame(data=station_GRD, index=data_GRD['cal_date'], columns=[grd_col_name])
    df[sir_col_name] = station_SIR
    df[grd_dav_col_name] = station_GRD_DAV
    df[sir_dav_col_name] = station_SIR_DAV
    #print(df)

    # Plot Tb data
    fig, axes = plt.subplots(6,1, sharex=True, figsize=(12, 16))
    df[sir_col_name].plot(ax=axes[0], label=sir_col_name, color='skyblue')
    df[sir_dav_col_name].plot(ax=axes[1], label=sir_dav_col_name, color='skyblue')
    df[grd_col_name].plot(ax=axes[2], label=grd_col_name, color='dodgerblue')
    df[grd_dav_col_name].plot(ax=axes[3], label=grd_dav_col_name, color='dodgerblue')
    station_df['T_max(C)'].plot(ax=axes[4], label='$T_{max} (^oC)$', color='dimgray')
    station_df['T_min(C)'].plot(ax=axes[4], label='$T_{min} (^oC)$', color='darkgray')
    hunza['Q'].plot(ax=axes[5], label='Dainyor Bridge Discharge', color='black')
    
    # Set y axis labels
    axes[0].set_ylabel('$T_B$ ($K$)')
    axes[1].set_ylabel('DAV ($K$)')
    axes[2].set_ylabel('$T_B (K)$')
    axes[3].set_ylabel('DAV ($K$)')
    axes[4].set_ylabel('Temperature ($^oC$)')
    axes[5].set_ylabel('Q ($m^3 s^{-1}$)')
    
    axes[0].set_title("%s %s at %s" % (platform, sensor, station['name']))
    #axes[1].set_title("%s %s %s at %s" % (platform, sensor, grd_col_name, station['name']))
    
    if do_MOD_lines:
        # Add lines for SIR MODs to SIR TB and DAV
        for ax in [axes[0], axes[1]]:
            for i, mod in enumerate(station_SIR_MOD):
                if i == 0:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-",
                               label="SIR Melt Onset")
                else:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-")
        for ax in [axes[0]]:
            for i, mod in enumerate(station_SIR_MOD):
                if i == 0:
                    label = " Melt Onset"
                else:
                    label = ""
                ax.annotate(label,
                            xy=(mod, 200.), 
                            xytext=(mod, 162.),
                            arrowprops=dict(facecolor='orangered', 
                                            headwidth=6,
                                            width=3,
                                            shrink=0.05),
                            fontsize=14,
                            color='orangered',
                            horizontalalignment='center')
        for ax in [axes[1]]:
            for i, mod in enumerate(station_SIR_MOD):
                if i == 0:
                    label = ""
                    xytext = (mod, 100.)
                else:
                    label = ""
                    xytext = (mod, 110.)
                ax.annotate(label,
                            xy=(mod, 70.), 
                            xytext=xytext,
                            arrowprops=dict(facecolor='orangered', 
                                            headwidth=6,
                                            width=3,
                                            shrink=0.05),
                            fontsize=12,
                            color='orangered',
                            horizontalalignment='center')
                
        # Add lines for GRD MODs to GRD Tb and DAV
        for ax in [axes[2], axes[3]]:
            for i, mod in enumerate(station_GRD_MOD):
                if i == 0:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-",
                               label="GRD Melt Onset")
                else:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-")
        for ax in [axes[2]]:
            for i, mod in enumerate(station_GRD_MOD):
                if i == 0:
                    label = " Melt Onset"
                else:
                    label = ""
                ax.annotate(label,
                            xy=(mod, 200.), 
                            xytext=(mod, 169.),
                            arrowprops=dict(facecolor='orangered', 
                                            headwidth=6,
                                            width=3,
                                            shrink=0.05),
                            fontsize=14,
                            color='orangered',
                            horizontalalignment='center')

        for ax in [axes[3]]:
            for i, mod in enumerate(station_GRD_MOD):
                if i == 0:
                    label = ""
                    xytext = (mod, 85.)
                else:
                    label = ""
                    xytext = (mod, 95.)
                ax.annotate(label,
                            xy=(mod, 70.), 
                            xytext=xytext,
                            arrowprops=dict(facecolor='orangered', 
                                            headwidth=6,
                                            width=3,
                                            shrink=0.05),
                            fontsize=12,
                            color='orangered',
                            horizontalalignment='center')

        # Add lines for SIR MODs to station temperatures
        for ax in [axes[4]]:
            for i, mod in enumerate(station_SIR_MOD):
                if i == 0:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-",
                               label="SIR Melt Onset")
                else:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-")
    
    # Add lines for Tb and DAV thresholds
    for ax in [axes[0], axes[2]]:
        ax.axhline(y=Tb_threshold, color='gray',
                   linestyle=":", 
                   label="$Tb$ = %d $K$" % Tb_threshold)
        
    for ax in [axes[1], axes[3]]:
        ax.axhline(y=DAV_threshold, color='gray',
                   linestyle=":", 
                   label="$DAV$ = %d $K$" % DAV_threshold)
        
    axes[4].axhline(y=0., color='gray',
                   linestyle=":", 
                   label="$T$ = %d $K$" % 0.)

    if do_MOD_lines:
        
        # Add lines for SIR MODs to discharge plot
        for ax in [axes[5]]:
            for i, mod in enumerate(station_SIR_MOD):
                if i == 0:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-",
                               label="SIR Melt Onset")
                else:
                    ax.axvline(x=mod, color='orangered',
                               linestyle="-")
    
    for ax in axes:
        ax.legend(framealpha=0.5, loc='upper right')
        
    plt.tight_layout()
    outfile = '/projects/brodzik/pmesdr_melt_onset/data/%s_%s_%s_%d-%d_%s_rSIR_vs_GRD_TB_tseries.png' % (
        station['name'], platform, sensor, Years[0], Years[-1], channel)
    fig.savefig(outfile, dpi=300)
    print("Saved plot to %s" % outfile)
