# tidal_classification

**Summary**: Iterates through ITEMv2 polygons, testing extracts Landsat observations and calculates offsets and spread of observations compared to entire OTPS tidal model. Results exported to .csv output files.

**Issues**: Currently fails after polygon 220 when producing annotated plots.

**Notes**: Tidal model code based on original by Claire Phillips.

In [1]:
# Import all modules
import datacube
import xarray as xr
import matplotlib.dates
import fiona
import shapely
import shapely.geometry
import rasterio
import rasterio.features
import matplotlib.pyplot as plt
import numpy as np
import itertools
import otps
import sys
import pandas as pd
import matplotlib as mpl

from __future__ import print_function
from shapely.geometry import Point, Polygon, MultiPolygon, mapping, shape
from fiona import collection
from datacube.utils import geometry
from datacube.api.query import query_group_by
from otps import TimePoint, predict_tide
from dateutil.relativedelta import relativedelta
from operator import itemgetter
from datetime import date, datetime, timedelta
from matplotlib import cm
from datetime import datetime as datetime
from IPython.display import display

dc = datacube.Datacube(app='Tidal offsets')

# User to define primary variables - Location

In [113]:
from datetime import datetime as datetime

def date_range(start_date, end_date, increment, period):
    
    """Generate dates seperated by given time increment/period"""
    
    result = []
    nxt = start_date
    delta = relativedelta(**{period:increment})
    while nxt <= end_date:
        result.append(nxt)
        nxt += delta
    return result


def datetime_slicing(input_date):
    
    """
    Faster equivelent of datetime.strptime: uses slices to extract 
    datetime object from date string in format %Y-%m-%d %H:%M:%S
    """
    
    try:
        year = int(input_date[:4])
        month = int(input_date[5:7])
        day = int(input_date[8:10])
        hour = int(input_date[11:13])
        minute = int(input_date[14:16])
        my_date = datetime(year, month, day, hour, minute)        
        return(my_date)

    except ValueError:
        my_date = None

filepath='/g/data/r78/intertidal/GA_native_tidal_model.shp'
products = ['ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'] 
time_period=('1986-01-01', '2018-01-01') # Global time range
LS7_SLC_DT = datetime.strptime("2003-05-01", "%Y-%m-%d") # Removes SLC failure

# Create empty list for offset values
offset_spread = list()

# Iterate through polygons
for polygon in range(1, 307):

    print("Processing polygon " + str(polygon))

    try:

        #########################################################
        # modelled tide heights and dates for global time range #
        #########################################################

        print("Processing modelled tide heights")

        # Open ITEM polygons and identify lat long (seems awkward?)
        with fiona.open(filepath) as Input:
            crs = geometry.CRS(str(Input.crs_wkt))

            # For each polygon, extract long/lat/ID
            for feature in Input:
                Id = feature['properties']['ID']            

                # If polygon is selected polygon
                if Id == polygon:
                    lon = feature['properties']['lon']
                    lat = feature['properties']['lat']

        # Reformat time strings for start and end of period
        start = datetime.strptime(time_period[0] + " 00:00:00", "%Y-%m-%d %H:%M:%S")
        end = datetime.strptime(time_period[1] + " 00:00:00", "%Y-%m-%d %H:%M:%S")

        # For each hour between start and end of timeperiod, convert to 
        # timepoint and add to list for model
        tp_model = list(TimePoint(lon, lat, datetime_slicing(str(i))) 
                        for i in date_range(start, end, 1, 'hours'))

        # For each timestep in list, predict tide and add to list
        tides_model = predict_tide(tp_model)
        tide_det = list()
        for tt in tides_model:
            tide_det.append([tt.timepoint.timestamp.isoformat()[0:10], tt.tide_m])

        # Create pd dataframe of dates and heights
        df2_data = {'Model_height': list(x[1] for x in sorted(tide_det))}
        df2_index = list(x[0] for x in sorted (tide_det))
        df2_model = pd.DataFrame(df2_data, index=df2_index) 
        

        #########################################################
        # Observed tide heights and dates for global time range #
        #########################################################

        print("Processing observed tide heights")

        all_times = list()

        # Open ITEM polygons
        with fiona.open(filepath) as Input:
            crs = geometry.CRS(str(Input.crs_wkt))

            # For each polygon, extract long/lat/ID
            for feature in Input:
                lon = feature['properties']['lon']
                lat = feature['properties']['lat']
                Id = feature['properties']['ID']            

                # If polygon is selected polygon
                if Id == polygon:

                    # Take first geometry and and identify area covered for input to dc call
                    first_geometry = feature['geometry']
                    geom = geometry.Geometry(first_geometry, crs=crs)

                    # For each product:                                
                    for source in products:                 
                        time_range = ('1986-01-01', '2003-05-01') if source == 'ls7_pq_albers' else time_period

                        # Determine matching datasets for geom area grouped by solar day
                        ds = dc.find_datasets(product=source, 
                                              time=time_range, 
                                              geopolygon=geom, 
                                              group_by='solar_day')
                        group_by = query_group_by(group_by='solar_day')
                        sources = dc.group_datasets(ds, group_by)

                        # If data is found, add time to list then sort
                        if len(ds) > 0 :
                            all_times = all_times + [dd for dd in sources.time.data.astype('M8[s]').astype('O').tolist()] 
                        all_times = sorted(all_times)   

                    break

        # Calculate tide data from X-Y-time location 
        tp = list()
        for dt in all_times:
            tp.append(TimePoint(lon, lat, dt))
        tides = predict_tide(tp)

        # Add resulting data to dict and sort
        tide_dic = dict()
        for tt in tides:
            tt_time = datetime_slicing(tt.timepoint.timestamp.isoformat()[0:19])
            tide_dic[tt_time] = tt.tide_m
        tide_data = sorted(tide_dic.items(), key=lambda x: x[1])

        # Create dataframe of tide heights, indexed by date/time (UTC)
        df1_data = {'Tide_height': list((x[1] for x in sorted(tide_data)))}
        df1_index = list(x[0].strftime('%Y-%m-%d') for x in sorted (tide_data))
        df1_obs = pd.DataFrame(df1_data, index=df1_index)


        ########################################
        # Tide observations offsets and spread #
        ########################################

        print("Computing offsets and spread")

        # Modelled data stats
        model_low = df2_model.Model_height.min()
        model_high = df2_model.Model_height.max()
        model_range = model_high - model_low

        # Observed data stats
        obs_low = df1_obs.Tide_height.min()
        obs_high = df1_obs.Tide_height.max()
        obs_range = obs_high - obs_low

        # Calculate offsets and spreads
        offset = (((obs_low + obs_high) / 2) - model_low) / model_range - 0.5
        offset_low = (obs_low - model_low) / model_range
        offset_high = (model_high - obs_high) / model_range
        spread = obs_range / model_range

        # Append to list
        print("    Offset, low tide offset, high tide offset and spread:")
        print("   ", offset, offset_low, offset_high, spread)    
        offset_spread.append([polygon, offset, offset_low, offset_high, spread])


#         ########
#         # Plot #
#         ########

#         try:

#             # Plot setup
#             fig = plt.figure(figsize=(9, 3))
#             mpl.rcParams['agg.path.chunksize'] = 10000
#             plt.ylabel('Tide height (m)')
#             plt.title('Available data for polygon ' + str(polygon) + 
#                       ' (offset = ' + str(round(offset, 2)) + ', spread = ' + 
#                       str(round(spread, 2)) + ')')

#             # Data series, pepare x-axis
#             obs_index = pd.DatetimeIndex(df1_index)
#             obs_index = obs_index.to_datetime()
#             obs_index = np.array(obs_index.to_pydatetime(), dtype = numpy.datetime64)

#             # Plot observations
#             scatter(obs_index, df1_obs.Tide_height, s=10, color='black', 
#                     marker='o', zorder=2, label = 'observations') # All observations

#             ## Model values, prepare x-axis
#             model_index = pd.DatetimeIndex(df2_index)
#             model_index = model_index.to_datetime()
#             model_index = np.array(model_index.to_pydatetime(), dtype = numpy.datetime64)

#             # Plot model values
#             plot(model_index, df2_model.Model_height, color='lightgray', 
#                  linewidth=0.5, zorder=1, label = 'OTPS model')

#             # Create percentage lines
#             per10_list = []
#             for PERC in [0, offset_low, offset + 0.5, 1-offset_high, 1.0]:
#                 lmr = PERC * model_range + model_low  # low tide max range
#                 per10_list.append([round(PERC, 2), lmr]) 

#             per10_data = {'PERC_tide_height': list(x[1] for x in sorted(per10_list))}
#             per10_index = list(x[0] for x in sorted(per10_list))
#             df_per10 = pd.DataFrame(per10_data, per10_index)

#             # Add lines to plot
#             for i in per10_list:
#                 plot([model_index[0], 
#                       model_index[-1]], 
#                      [df_per10.PERC_tide_height[i],                                             
#                       df_per10.PERC_tide_height[i]], 'black', linewidth=0.4)
#                 plt.text(model_index[-1] + 100, i[1] - 0.025, str(i[0]), fontsize=8)

#             # Force space for y axis by adding data to x axis
#             plot([(model_index[0]), 
#                   (model_index[-1] + 1000)], 
#                  [(df_per10.PERC_tide_height[0] - 0.3),
#                   df_per10.PERC_tide_height[0]], 'none', linewidth=0)

#             plt.savefig('figures/tidal_polygons/' + str(polygon) + '_tidal_data.jpg')        
#             %matplotlib inline
#             display(fig)

#         except:
#             print("Figure failed")

        # Close datasets
        tides_model = None
        tides = None
        tide_data = None
        tp_model = None
        df2_data = None
        df2_model = None
        df1_data = None
        df1_obs = None

    except:

        print("Skipping polygon " + str(polygon))

# Create dataframe
offset_spread_index = list(x[0] for x in sorted(offset_spread))
offset_spread_df = pd.DataFrame(offset_spread,
                                columns=["ID", "offset", 
                                         "offset_low", "offset_high", "spread"],
                                index = offset_spread_index)
offset_spread_df.to_csv("item_offset_spread_1-306.csv", index = False)


Processing polygon 221
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0520951302378 0.14892412231 0.0447338618347 0.806342015855
Processing polygon 222
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.066000377145 0.222326984726 0.0903262304356 0.687346784839
Processing polygon 223
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0710676995621 0.210845402492 0.0687100033681 0.720444594139
Processing polygon 224
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0649466192171 0.139383155397 0.00948991696323 0.851126927639
Processing polygon 225
Processing

Processing polygon 255
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0065180102916 0.0905660377358 0.0775300171527 0.831903945111
Processing polygon 256
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0862469171623 0.196286087335 0.0237922530103 0.779921659655
Processing polygon 257
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.00773036487322 0.170686456401 0.155225726654 0.674087816945
Processing polygon 258
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.1094466524 0.243656374197 0.0247630693977 0.731580556405
Processing polygon 259
Processing 

Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    -0.00534963698892 0.0206343141001 0.031333588078 0.948032097822
Processing polygon 290
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    -0.0746896275531 0.046856227473 0.196235482579 0.756908289948
Processing polygon 291
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.0184753661784 0.109853528628 0.0729027962716 0.8172436751
Processing polygon 292
Processing modelled tide heights
Processing observed tide heights
Computing offsets and spread
    Offset, low tide offset, high tide offset and spread:
    0.118195564516 0.264364919355 0.0279737903226 0.707661290323
Processing polygon 293
Processing modelled tide heights
Processing observed tide heights
