To-do:
- Create generic point and linear sample data sets for testing
- Review existing features for expected behavior, standard terminology, ease of use
- Unit testing of basic functionality of `events` module
  - Initialization, validation, dtype
  - Modification and analysis features
- Review, refine documentation of `events` module


# Dependencies

In [1]:
from linref.events import Rangel
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import geopandas as gpd
import os

# Sample Data

In [2]:
df_linear_1 = pd.read_csv(os.path.join('testing', 'data', 'linear-events-1.csv'), index_col='id')
df_linear_2 = pd.read_csv(os.path.join('testing', 'data', 'linear-events-2.csv'), index_col='id')
df_point_1 = pd.read_csv(os.path.join('testing', 'data', 'point-events-1.csv'), index_col='crash_id')

In [3]:
rng_linear_1 = Rangel(
    index=df_linear_1.index,
    groups=df_linear_1[['county', 'road_name']].to_records(index=False),
    begs=df_linear_1['begin_mp'],
    ends=df_linear_1['end_mp'],
    closed='left_mod'
)
rng_linear_2 = Rangel(
    index=df_linear_2.index,
    groups=df_linear_2[['county', 'road_name']].to_records(index=False),
    begs=df_linear_2['begin_mp'],
    ends=df_linear_2['end_mp'],
    closed='left_mod'
)
rng_point_1 = Rangel(
    index=df_point_1.index,
    groups=df_point_1[['county', 'road_name']].to_records(index=False),
    locs=df_point_1['mp']
)

In [4]:
df_linear_2[['county', 'road_name']].to_records(index=False).itemsize

16

In [5]:
np.array(('Cook', 'LA'), dtype=rng_linear_1.groups.dtype).ndim

0

# Example Features

In [6]:
rng_linear_1

0, group(('Los Angeles', '...) [0.000, 1.200]
1, group(('Los Angeles', '...) [1.400, 2.500)
2, group(('Los Angeles', '...) [2.500, 3.700]
3, group(('Los Angeles', '...) [3.900, 4.800)
4, group(('Los Angeles', '...) [4.800, 5.900]
...90 records...
95, group(('King', '4th Ave') ) [0.000, 1.200]
96, group(('King', '4th Ave') ) [1.400, 2.400)
97, group(('King', '4th Ave') ) [2.400, 3.500]
98, group(('King', '4th Ave') ) [3.700, 4.700)
99, group(('King', '4th Ave') ) [4.700, 5.800]
Rangel(100 grouped, monotonic linear events, closed=left_mod)

## Selection

In [7]:
rng_linear_1[:5]

0, group(('Los Angeles', '...) [0.000, 1.200]
1, group(('Los Angeles', '...) [1.400, 2.500)
2, group(('Los Angeles', '...) [2.500, 3.700]
3, group(('Los Angeles', '...) [3.900, 4.800)
4, group(('Los Angeles', '...) [4.800, 5.900]
Rangel(5 grouped, monotonic linear events, closed=left_mod)

In [8]:
rng_linear_1.select_group([('King', '4th Ave'), ('Los Angeles', 'Main St')])

0, group(('Los Angeles', '...) [0.000, 1.200]
1, group(('Los Angeles', '...) [1.400, 2.500)
2, group(('Los Angeles', '...) [2.500, 3.700]
3, group(('Los Angeles', '...) [3.900, 4.800)
4, group(('Los Angeles', '...) [4.800, 5.900]
...4 records...
95, group(('King', '4th Ave') ) [0.000, 1.200]
96, group(('King', '4th Ave') ) [1.400, 2.400)
97, group(('King', '4th Ave') ) [2.400, 3.500]
98, group(('King', '4th Ave') ) [3.700, 4.700)
99, group(('King', '4th Ave') ) [4.700, 5.800]
Rangel(14 grouped, monotonic linear events, closed=left_mod)

In [9]:
rng_linear_2[rng_linear_2.begs >= 10]

5, group(('Los Angeles', '...) [10.000, 11.100)
6, group(('Los Angeles', '...) [11.000, 12.300]
7, group(('Los Angeles', '...) [12.400, 13.400)
8, group(('Los Angeles', '...) [13.300, 14.000]
15, group(('Los Angeles', '...) [11.000, 12.000)
...5 records....
33, group(('Maricopa', 'Cen...) [12.100, 13.200)
34, group(('Maricopa', 'Cen...) [13.100, 14.300]
66, group(('Dallas', 'Elm St')) [10.800, 12.000]
73, group(('King', '1st Ave') ) [10.900, 12.100]
74, group(('King', '1st Ave') ) [12.200, 13.300]
Rangel(15 grouped, monotonic linear events, closed=left_mod)

## Sorting

In [10]:
rng_linear_1.sort(by=['groups', 'begs'])

75, group(('Clark', '2nd St') ) [0.000, 1.100]
76, group(('Clark', '2nd St') ) [1.300, 2.300)
77, group(('Clark', '2nd St') ) [2.300, 3.400]
78, group(('Clark', '2nd St') ) [3.600, 4.600)
79, group(('Clark', '2nd St') ) [4.600, 5.700]
...90 records...
38, group(('San Diego', 'Ba...) [3.500, 4.600]
39, group(('San Diego', 'Ba...) [4.800, 5.800)
40, group(('San Diego', 'Ba...) [5.800, 7.000]
41, group(('San Diego', 'Ba...) [7.200, 8.100)
42, group(('San Diego', 'Ba...) [8.100, 9.300]
Rangel(100 grouped, monotonic linear events, closed=left_mod)

## Event modification

In [12]:
rng_linear_2.shift(1000)

0, group(('Los Angeles', '...) [1003.900, 1005.200]
1, group(('Los Angeles', '...) [1005.300, 1006.500)
2, group(('Los Angeles', '...) [1006.400, 1007.700]
3, group(('Los Angeles', '...) [1007.800, 1008.800)
4, group(('Los Angeles', '...) [1008.700, 1009.900]
.....45 records.....
95, group(('King', '4th Ave') ) [1003.900, 1005.200]
96, group(('King', '4th Ave') ) [1005.300, 1006.400)
97, group(('King', '4th Ave') ) [1006.300, 1007.500]
98, group(('King', '4th Ave') ) [1007.600, 1008.700)
99, group(('King', '4th Ave') ) [1008.600, 1009.800]
Rangel(55 grouped, monotonic linear events, closed=left_mod)

In [13]:
rng_linear_2.extend(100, 1000)

0, group(('Los Angeles', '...) [ -96.100, 1005.200)
1, group(('Los Angeles', '...) [ -94.700, 1006.500)
2, group(('Los Angeles', '...) [ -93.600, 1007.700)
3, group(('Los Angeles', '...) [ -92.200, 1008.800)
4, group(('Los Angeles', '...) [ -91.300, 1009.900)
.....45 records.....
95, group(('King', '4th Ave') ) [ -96.100, 1005.200)
96, group(('King', '4th Ave') ) [ -94.700, 1006.400)
97, group(('King', '4th Ave') ) [ -93.700, 1007.500)
98, group(('King', '4th Ave') ) [ -92.400, 1008.700)
99, group(('King', '4th Ave') ) [ -91.400, 1009.800]
Rangel(55 grouped, monotonic linear events, closed=left_mod)

## Event relation

In [14]:
sparse = rng_linear_1[:10].intersecting(rng_point_1[:5])
dense = sparse.todense()
sparse, dense, sparse.sum(axis=1)

(<Compressed Sparse Column sparse array of dtype 'bool'
 	with 5 stored elements and shape (9, 5)>,
 array([[ True, False, False, False, False],
        [False,  True, False, False, False],
        [False, False,  True, False, False],
        [False, False, False,  True, False],
        [False, False, False, False,  True],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False]]),
 array([1, 1, 1, 1, 1, 0, 0, 0, 0]))

In [15]:
sparse = rng_linear_1[:10].intersecting(rng_linear_2[:5])
dense = sparse.todense()
sparse, dense, sparse.sum(axis=1)

(<Compressed Sparse Row sparse array of dtype 'bool'
 	with 10 stored elements and shape (9, 5)>,
 array([[False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [ True, False, False, False, False],
        [ True,  True, False, False, False],
        [False,  True,  True, False, False],
        [False, False,  True,  True, False],
        [False, False, False,  True,  True],
        [False, False, False, False,  True]]),
 array([0, 0, 0, 1, 2, 2, 2, 2, 1]))

In [16]:
sparse = rng_linear_1[:10].overlay(rng_linear_2[:5], normalize=True, norm_by='right')
dense = sparse.todense()
sparse, dense.round(2), sparse.sum(axis=1).round(2)

(<Compressed Sparse Row sparse array of dtype 'float64'
 	with 10 stored elements and shape (9, 5)>,
 array([[0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.69, 0.  , 0.  , 0.  , 0.  ],
        [0.31, 0.5 , 0.  , 0.  , 0.  ],
        [0.  , 0.33, 0.54, 0.  , 0.  ],
        [0.  , 0.  , 0.46, 0.5 , 0.  ],
        [0.  , 0.  , 0.  , 0.3 , 0.58],
        [0.  , 0.  , 0.  , 0.  , 0.42]]),
 array([0.  , 0.  , 0.  , 0.69, 0.81, 0.87, 0.96, 0.88, 0.42]))