In [1]:
!date

Mon Mar 22 13:49:10 EDT 2021


# Backward time loop to track coast-crossing vapor lakes (CCVLs)

------------


## each new assigned CCVL creates a folder called `tag/` 

### tag format is `yyyymmddhh_at_coast.meanlat`
    meanlat is the mean latitude of the overlap with a coastline or (simplest case) meridian
    
## tag/shapefiles/
    contains a set of files yymmddhh.fiona with a PolyList = [polygon,polygon,...] in each file

## tag/scratch.txt 
    used during loop (or can be an overwriteable memory object, but a file feels clearer right now)

## tag/times.txt    
    one line for each time level when CCVL `tag` exists: [time, nsegments, total-area, bbox]

In [1]:
import sys
import os
import xarray as xr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors

from datetime import datetime, timedelta
import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.util import add_cyclic_point
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

In [None]:
files = glob(MERRA2_dir+'*201[4,5]01*')
cwv_data = xr.open_mfdataset(files) # merging data files in one
cwv_data

----------
# Time loop over hours, backward in time


In [None]:
for time in reverse(alltimes): 
    yymmddhh = format(time)
    

## For each polygon in the "now" array, CWV_now: 

A **polygon** is a domain-enclosed patch which may contain holes. The Polygon constructor takes two positional parameters. The first is an ordered sequence of (x, y[, z]) point tuples. The second is an optional unordered sequence of ring-like sequences specifying the interior boundaries or “holes” of the feature. Rings of a valid Polygon may not cross each other, but may touch at a single point only.

In [None]:
    CWV_now = read(data)
    
# Need a function called **segment** that returns all the segments, and their maximum longitude (perhaps other properties too)
    cs = plt.contour(lon, lat, CWV_now, [55.])
    polygons = geometrize(cs)
    
# "inbounds" = just the ones not touching the edge of the domain   
    inbounds_polygons_now = polygons.where(convex_hull.lon.max() lt 100)

# Put them in a GeoPandas frame as the geometry column
    GeoFramePresent = gp.GeoSeries(inbounds_polygons_now)

## Memory items available: 

In working memory within the loop are always 3 [GeoPandas dataframes](https://geopandas.org/getting_started/introduction.html): 

0. GPdf_coast with the polygons of coastline, df_coast, no data columns 
1. GPdf_destined with the polygons of destiny (future in time, prior in backward time loop) 
2. GPdf_now with the polygons of the present

The geometry column contains all the holey polygons where CWV>55. The data columns are the tags of the active LLVLs, of which each holey polygon is a part. A holey polygon can be tagged as part of multiple CCVLs (notnull entry in multiple data columns), because physically it may be destined to split into parts that can each cross the coast. The data columns probably have a lot of empty or null entries, polygons that are never destined to cross the coast. 

![geopandas dataframe tableau](https://geopandas.org/_images/dataframe.svg "GeoPandas dataframe schema")

# Loop over the active CCVLs 
(the rows in GPdf_destined with at least one non-null data column entry) 

In [None]:
# "All pandas DataFrame methods are also available, although they may not operate in a meaningful way on the geometry column.
    in_active_CCVLs = Gdf_destined.notnull().any(axis=1)
    
# For *EACH* of the polygons that is in any activeCCVL, test the overlap with *ALL* the GPdf_now.GeoSeries polygons

    for polygon in in_active_CCVLs.GeoColumn
        indices_1, indices_0 = CCVL.sindex.query_bulk(GPdf_0.GeoSeries, predicate='overlaps')
        overlapping = numpy.unique(indices_0)  # integer indeces of overlapping



In [None]:
# "All pandas DataFrame methods are also available, although they may not operate in a meaningful way on the geometry column.
# For *ALL* of the polygons that is in any activeCCVL, test the overlap with *ALL* the GPdf_now.GeoSeries polygons

    indices_destined, indices_now = Gdf_destined.notnull().any(axis=1).sindex.query_bulk(GPdf_0.GeoSeries, predicate='overlaps')

# Add data columns in Gfd_now which are identical with the data columns in Gdf_destined
    Gdf_now[indices_now] = Gdf_destined[indices_destined]
    
# But now we will have to rejigger the dataframe to assemble the CCVLs from all their parts. Hmm. 


## OK, we have iterates seg0, and seg1
### Do they overlap? 
### If so, then CCVL_next contains seg0

In [None]:
# from https://shapely.readthedocs.io/en/stable/manual.html
# object.overlaps(other)
# Returns True if the geometries have more than one but not all points in common, have the same dimension, and the intersection of the interiors of the geometries has the same dimension as the geometries themselves.

# Or, "If you want to check if the polygon overlaps with any other, you should check for that using geopandas spatial indexing capability."
# " # https://geopandas.org/getting_started/introduction.html#Geometry-relations
# https://geopandas.org/docs/reference/api/geopandas.sindex.SpatialIndex.query_bulk.html

input_indices, result_indices = shapes.sindex.query_bulk(shapes.geometry, predicate='overlaps')
overlapping = numpy.unique(result_indices)  # integer indeces of overlapping


# Faster is PyGEOS: 
# https://pygeos.readthedocs.io/en/latest/
#
# from_shapely(geometry, **kwargs) # Creates geometries from shapely Geometry objects.
# prepare(geometry, **kwargs)      # Prepare a geometry, improving performance of other operations.
#overlaps(a, b, **kwargs) #Returns True if A and B spatially overlap.
#Compute the area of all possible intersections of two lists of polygons:
#>>> area(intersection(polygons_x[:, np.newaxis], polygons_y[np.newaxis, :]))
#array([[100.,  90.,  80.,  70.,  60.],
#     [ 90.,  81.,  72.,  63.,  54.], ...
# geometrycollections(geometries, indices=None, **kwargs)
    

In [None]:
                if overlap(seg1, seg0):
                    # consequence 1: this CCVL remains active (so CCVLkey belongs in the CCVL_new dictionary for next time iteration)
                    # that dictionary's value is a list of segments within CWV0, so syntax (perhaps illegal) is:
                    CCVL_new.update( {CCVLkey : append(seg0)} )  
                    
                    # consequence 2: seg0 is not eligible to pioneer a new CCVL. Need to mark it somehow. 
                    segflags[iseg] = True

-------------
----------------
## Strategize the desired outputs, so we can write results as they are obtained:

----------------
## The use of the output will be to 

    * screen whole-lifetime CCVL events based on lines in `CClakes.txt`
    * For selected events, further screen the hourly objects, based on lines in `tag/tag.times.txt`
    * Visualize lakes at selected times by placing shapefile contours on a geographical map, perhaps color-coded by time or by tag
-----------------
-----------------

### Therefore, at this step, we need to append seg0 as a shape in `tag/shapefiles/yymmddhh.contours`
### and also tally up seg0's contribution to the bulk statistics of CCVLkey at this time zero 



In [None]:
                    contour_append(tag + '/shapefiles/' + yymmddhh + '.contours', seg0)  
                    update_1timetagstats( tagstats, seg0 ) # nsegs, npixels, kgwater, max/min lat/lon

#### End of loop over all seg0, and all seg1 for a given CCVL. If tagstats_t0[0] is still its initialization of zero, the CCVL quietly ends. 

In [None]:

                # end for seg0 in ibounds            
            # end for seg1 in CCVLlist
            if (tagstats_t0[0] > 0): 
                append_1timetagstats(tagstats, tag+'/'+yymmhhdd+'.txt')

# what if some seg0 overlaps the coast, and not any previous seg0? 
# A new CCVL! 

In [None]:
for iseg, seg0 in enumerate(inbounds, start=0): 
    if( does_overlap(coastline, seg0) and segflags[iseg] == False):
        
# Create a new 'tag' based on time and the latitude of the coast-crossing segment
        lala = meanlat_of_overlap(coastline,seg0)
        newtag = yymmddhh + '_' + str(lala)
        CCVL_new.update( {CCVLkey : append(seg0)} )  


### end of time zero (t0). Close the time loop and iterate. 

In [None]:
# end for time in reverse(alltimes): 
# Overwrite the dictionary of active CCVLs for the next iteration 
CCVLs_active = CCVLs_new
# END CODE: ITERATION BACKWARD OVER TIME NOW REPEATS

-----------------
-----------------
-----------------

# a SEPARATE code can then glob over all the tag/yymmhhdd.txt files and construct `CClakes.txt`
## with one line per tag, comprising the stats over the whole lifetime of each CCVL
### summing up start and end date, lifetime in hours, total (area x hours), total (vapor x hours), bounding box (convex hull), centroid, etc. etc. 

#### All times (and thus tags and filenames) are based on the time of *last* contact with the coastline (first encountered, in the reverse-time flow of the algorithm's time loop). 

In [None]:
# GeoPandas 
# https://geopandas.org/getting_started/introduction.html#Geometry-relations

# convex hull 

# gdf["convex_hull"] = gdf.convex_hull

# Open the data file of all time slices 

In [6]:
!ls -atlh /data2/brian/WEIO_30-100_20S-20N_2014-8.nc

-rw-rw-r-- 1 bmapes bmapes 755M Mar 10 18:55 /data2/brian/WEIO_30-100_20S-20N_2014-8.nc


In [7]:
TQV_WEIO = xr.open_dataset('/data2/brian/WEIO_30-100_20S-20N_2014-8.nc')
TQV_WEIO

In [4]:
# playing with dictionaries for syntax test 
str1 = '2020030123'
str2 = 'dog'

dict = {}
dict.update({0.34 : str2})

dict
# print(dict[str1])


{0.34: 'dog'}