```
conda install shapely rtree
```

In [47]:
import os
import sys
import arcgis
from shapely.wkb import loads
from shapely.geometry import LineString
from spark_esri import spark_start, spark_stop

In [48]:
spark_stop()

In [49]:
config = {"spark.driver.memory":"2G"}
spark = spark_start(config=config)

In [50]:
sp_ref = arcpy.SpatialReference(3857)

In [51]:
fields = ["GateID","SHAPE@WKB"]
with arcpy.da.SearchCursor("Gates", fields, spatial_reference=sp_ref) as rows:
    bv = spark.sparkContext.broadcast(list(rows))

In [52]:
fields = ["SHAPE@X","SHAPE@Y","MMSI","BaseDateTime"]
with arcpy.da.SearchCursor("Broadcast", fields, spatial_reference=sp_ref) as rows:
    spark\
        .createDataFrame(rows, "x double,y double,mmsi string,t timestamp")\
        .selectExpr("mmsi","x","y","unix_timestamp(t) t")\
        .createOrReplaceTempView("v0")

In [53]:
spark\
    .sql("""
select mmsi,
x x1,
y y1,
t t1,
lead(x,1,0.0) over (partition by mmsi order by t) x2,
lead(y,1,0.0) over (partition by mmsi order by t) y2,
lead(t,1,0) over (partition by mmsi order by t) t2
from v0
""")\
    .createOrReplaceTempView("v1")

In [54]:
spark.sql("select *,(x2-x1) dx,(y2-y1) dy,(t2-t1) dt from v1 where t1 < t2").createOrReplaceTempView("v2")

In [55]:
spark.sql("select mmsi,x1,y1,x2,y2,sqrt(dx*dx+dy*dy) dd,dt from v2").createOrReplaceTempView("v3")

In [56]:
spark.sql("select *,dd/dt mps from v3").createOrReplaceTempView("v4")

In [57]:
df1 = spark.sql("""
select x1,y1,x2,y2
from v4
where dd between 1 and 1500
and mps < 25
and dt < 130
""")

In [58]:
def func(partition):
    from shapely.wkb import loads
    from shapely.geometry import LineString
    
    def name_geom(_g):
        """Function to convert WKB to a shapely geometry and line vector.
        """
        geom = loads(bytes(_g[1]))
        coords = geom.coords
        head = coords[0]
        last = coords[-1]
        vx = last[0] - head[0]
        vy = last[1] - head[1]
        return _g[0],geom,vx,vy
        
    # Read the gates in WKB format from the broadcast variable.
    gates = [name_geom(v) for v in bv.value]
        
    # Perform cartesian product of paths and gates.
    for row in partition:
        x1 = row["x1"]
        y1 = row["y1"]
        x2 = row["x2"]
        y2 = row["y2"]
        px = x2 - x1
        py = y2 - y1
        path = LineString([(x1,y1),(x2,y2)])
        for gate_id, gate_geom, gx, gy in gates:
            point = gate_geom.intersection(path)
            if not point.is_empty:
                cross = px * gy - py * gx
                lr_rl = "RL" if cross < 0.0 else "LR" 
                yield point.x,point.y,gate_id,lr_rl

In [59]:
df2 = df1\
    .rdd\
    .mapPartitions(func)\
    .toDF(["gate_x","gate_y","gate_id","travel_dir"])\
    .cache()

df2.createOrReplaceTempView("v5")

In [60]:
rows = df2.collect()

In [61]:
ws = "memory"
nm = "GatePoints"

fc = os.path.join(ws,nm)

arcpy.management.Delete(fc)

arcpy.management.CreateFeatureclass(ws, nm, "POINT", spatial_reference=sp_ref)
arcpy.management.AddField(fc, "GATE_ID", "LONG")
arcpy.management.AddField(fc, "TRAVEL_DIR", "TEXT")

with arcpy.da.InsertCursor(fc, ["SHAPE@X","SHAPE@Y","GATE_ID","TRAVEL_DIR"]) as cursor:
    for row in rows:
        cursor.insertRow(row)

In [64]:
rows = spark.sql("""
select gate_id,travel_dir,count(1) cnt
from v5
group by gate_id,travel_dir
order by gate_id,travel_dir
""")\
    .collect()

In [65]:
ws = "memory"
nm = "GateStats"

fc = os.path.join(ws,nm)

arcpy.management.Delete(fc)

arcpy.management.CreateTable(ws, nm)
arcpy.management.AddField(fc, "GATE_ID", "LONG")
arcpy.management.AddField(fc, "TRAVEL_DIR", "TEXT")
arcpy.management.AddField(fc, "TRAVEL_CNT", "LONG")

with arcpy.da.InsertCursor(fc, ["GATE_ID","TRAVEL_DIR","TRAVEL_CNT"]) as cursor:
    for row in rows:
        cursor.insertRow(row)

In [None]:
spark_stop()