# Sandbox for playing around with heading-based segmentation, etc.

From Katy, with cosmetic tweaks by Emilio

In [1]:
import movingpandas as mpd
import geopandas as gpd
from matplotlib import pyplot as plt
from shapely.geometry import Point
import pandas as pd
import numpy as np
import hvplot.pandas
import geojson

In [2]:
mpd.__version__

'0.2.rc2'

In [3]:
file = 'ooinhdeep_points.geojson'

In [4]:
with open(file) as f:
    gj = geojson.load(f)
features = gj['features'][0]

In [5]:
#Extract lats and lons
lats = [feature['properties']['latitude'] for feature in gj['features']]
lons = [feature['properties']['longitude'] for feature in gj['features']]

In [6]:
df = pd.DataFrame({'lat': lats, 'lon': lons})
df.hvplot(x='lon', y='lat')

In [7]:
#Create a GeoDataFrame
geom = [Point(x,y) for x, y in zip(df['lon'], df['lat'])]
gdf = gpd.GeoDataFrame(df, geometry = geom)
gdf = gdf.set_crs(epsg=4326)

In [8]:
#Get gdf with differences between x and y for calculating angle with arctan. Think about what to do with last point
gdf_diff = gdf.diff(periods=-1)

  gdf_diff = gdf.diff(periods=-1)


In [9]:
gdf['theta'] = np.arctan2(gdf_diff['lat'], gdf_diff['lon'])

In [10]:
tol = .3
n = 500

In [11]:
def iterative_segmentation(gdf, criterion, tol):
    """
    gdf is the GeoDataFrame with tracks that you're interested in
    criterion is a column name in the gdf that you're using to segment — for example, angle if you're looking
        at changes in heading, or speed
    tol is the tolerance to a given criterion. For example, a tolerance of .3 means that a change of .3
        in criterion value between two rows will be allowed
    """
    trajectory_id = []
    j = 0 #i.e., the first trajectory
    for i, row in gdf.iterrows():
        #Assign starting value for the criterion
        if i == 0:
            start_value = row[criterion]
        else:
            if abs(row[criterion] - start_value) < tol:
                pass
            elif abs(row[criterion] - start_value) >= tol:
                j+=1 #Start a new trajectory label
        start_value = row[criterion] #Update comparison value. This method is more robust to changes in curvature
        #that may not represent a new trajectory
        trajectory_id.append(j)
    gdf['traj_id'] = trajectory_id
    return gdf

In [12]:
def direction_segmentation(gdf, criterion, tol):
    """
    gdf is the GeoDataFrame with tracks that you're interested in
    criterion is a column name in the gdf that you're using to segment — for example, angle if you're looking
        at changes in heading, or speed
    tol is the tolerance to a given criterion. For example, a tolerance of .3 means that a change of .3
        in criterion value between two rows will be allowed
    """
    trajectory_label = []
    j = 0 #i.e., the first trajectory
    for i, row in gdf.iterrows():
        #Assign starting value for the criterion
        if i == 0:
            start_value = row[criterion]
        else:
            if abs(row[criterion] - start_value) < tol:
                pass
            elif abs(row[criterion] - start_value) >= tol:
                start_angle = row[criterion] #only change starting angle if we're starting a new trajectory
                j+=1 #Start a new trajectory label
         #Update 
        trajectory_label.append(j)
    gdf['traj_id'] = trajectory_id
    return gdf

In [13]:
#Apply the iterative segmentation approach on angle with a tolerance of 0.3 
seg_gdf = iterative_segmentation(gdf, 'theta', .3)

In [14]:
traj_collection = mpd.TrajectoryCollection(seg_gdf, 'traj_id')

In [15]:
#For comparison with original gdf
gdf['traj_id'] = 1
traj_collection1 = mpd.TrajectoryCollection(gdf, 'traj_id')

In [16]:
len(traj_collection), type(traj_collection)

(585, movingpandas.trajectory_collection.TrajectoryCollection)

In [17]:
type(traj_collection.trajectories[1])

movingpandas.trajectory.Trajectory

In [18]:
plot = traj_collection.hvplot()
# hvplot.save(plot, 'gliders.html')

plot

## Comparison with original tracks

In [19]:
trajcoll_0 = traj_collection.trajectories[0].df.drop('geometry', axis=1)

In [20]:
trajcoll_0.head()

Unnamed: 0,lat,lon,theta,traj_id
1,44.65065,-124.30634,0.311621,1
2,44.64663,-124.31882,0.169528,1
3,44.64606,-124.32215,0.188073,1
4,44.64547,-124.32525,0.187153,1
5,44.6449,-124.32826,0.183943,1


In [21]:
trajcoll_0.hvplot.line(x='lon', y='lat') * trajcoll_0.hvplot.scatter(x='lon', y='lat')

In [22]:
trajcoll1_0 = traj_collection1.trajectories[0].df.drop('geometry', axis=1)

In [23]:
trajcoll1_0.head(200).hvplot(x='lon', y='lat')