To-do:
- Create generic point and linear sample data sets for testing
- Review existing features for expected behavior, standard terminology, ease of use
- Unit testing of basic functionality of `events` module
  - Initialization, validation, dtype
  - Modification and analysis features
- Review, refine documentation of `events` module


# Dependencies

In [1]:
from linref.events import Rangel
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import geopandas as gpd
import os

# Sample Data

In [2]:
df_linear_1 = pd.read_csv(os.path.join('testing', 'data', 'linear-events-1.csv'), index_col='id')
df_linear_2 = pd.read_csv(os.path.join('testing', 'data', 'linear-events-2.csv'), index_col='id')
df_point_1 = pd.read_csv(os.path.join('testing', 'data', 'point-events-1.csv'), index_col='crash_id')

In [3]:
rng_linear_1 = Rangel(
    index=df_linear_1.index,
    groups=df_linear_1[['county', 'road_name']].to_records(index=False),
    begs=df_linear_1['begin_mp'],
    ends=df_linear_1['end_mp'],
    closed='left_mod'
)
rng_linear_2 = Rangel(
    index=df_linear_2.index,
    groups=df_linear_2[['county', 'road_name']].to_records(index=False),
    begs=df_linear_2['begin_mp'],
    ends=df_linear_2['end_mp'],
    closed='left_mod'
)
rng_point_1 = Rangel(
    index=df_point_1.index,
    groups=df_point_1[['county', 'road_name']].to_records(index=False),
    locs=df_point_1['mp']
)

In [4]:
df_linear_2[['county', 'road_name']].to_records(index=False).itemsize

16

In [5]:
np.array(('Cook', 'LA'), dtype=rng_linear_1.groups.dtype).ndim

0

# Example Features

In [6]:
rng_linear_1

0, group(('Los Angeles', 'Mai) [0.000, 1.200]
1, group(('Los Angeles', 'Mai) [1.400, 2.500)
2, group(('Los Angeles', 'Mai) [2.500, 3.700]
3, group(('Los Angeles', 'Mai) [3.900, 4.800)
4, group(('Los Angeles', 'Mai) [4.800, 5.900]
...90 records...
95, group(('King', '4th Ave')) [0.000, 1.200]
96, group(('King', '4th Ave')) [1.400, 2.400)
97, group(('King', '4th Ave')) [2.400, 3.500]
98, group(('King', '4th Ave')) [3.700, 4.700)
99, group(('King', '4th Ave')) [4.700, 5.800]
Rangel(100 grouped, monotonic linear events, closed=left_mod)

## Selection

In [7]:
rng_linear_1[:5]

0, group(('Los Angeles', 'Mai) [0.000, 1.200]
1, group(('Los Angeles', 'Mai) [1.400, 2.500)
2, group(('Los Angeles', 'Mai) [2.500, 3.700]
3, group(('Los Angeles', 'Mai) [3.900, 4.800)
4, group(('Los Angeles', 'Mai) [4.800, 5.900]
Rangel(5 grouped, monotonic linear events, closed=left_mod)

In [10]:
rng_linear_1.select_group([('King', '4th Ave'), ('Los Angeles', 'Main St')])

0, group(('Los Angeles', 'Mai) [0.000, 1.200]
1, group(('Los Angeles', 'Mai) [1.400, 2.500)
2, group(('Los Angeles', 'Mai) [2.500, 3.700]
3, group(('Los Angeles', 'Mai) [3.900, 4.800)
4, group(('Los Angeles', 'Mai) [4.800, 5.900]
...4 records...
95, group(('King', '4th Ave')) [0.000, 1.200]
96, group(('King', '4th Ave')) [1.400, 2.400)
97, group(('King', '4th Ave')) [2.400, 3.500]
98, group(('King', '4th Ave')) [3.700, 4.700)
99, group(('King', '4th Ave')) [4.700, 5.800]
Rangel(14 grouped, monotonic linear events, closed=left_mod)

In [9]:
rng_linear_2[rng_linear_2.begs >= 10]

5, group(Main St      ) [10.000, 11.100)
6, group(Main St      ) [11.000, 12.300]
7, group(Main St      ) [12.400, 13.400)
8, group(Main St      ) [13.300, 14.000]
15, group(State St     ) [11.000, 12.000)
...5 records....
33, group(Central Ave  ) [12.100, 13.200)
34, group(Central Ave  ) [13.100, 14.300]
66, group(Elm St       ) [10.800, 12.000]
73, group(1st Ave      ) [10.900, 12.100]
74, group(1st Ave      ) [12.200, 13.300]
Rangel(15 grouped, monotonic linear events, closed=left_mod)

## Sorting

In [10]:
rng_linear_1.sort(by='groups')

ValueError: Input selector must be a 1D array-like object.

In [25]:
arr = np.array([['a', 'b'], ['a', 'c'], ['c', 'b'], ['d', 'a'], ['a', 'b'], ['z', 'a']])
arr

array([['a', 'b'],
       ['a', 'c'],
       ['c', 'b'],
       ['d', 'a'],
       ['a', 'b'],
       ['z', 'a']], dtype='<U1')

In [21]:
np.argsort(arr, axis=0)

array([[0, 3],
       [1, 5],
       [4, 0],
       [2, 2],
       [3, 4],
       [5, 1]])

In [28]:
np.lexsort(arr.T, axis=0)

array([3, 5, 0, 4, 2, 1])

In [31]:
np.concatenate([arr, arr, arr[:, 0]], axis=1)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 2 has 1 dimension(s)

## Event modification

In [6]:
rng_linear_2.shift(1000)

0, group(      Main St) [1003.900, 1005.200]
1, group(      Main St) [1005.300, 1006.500)
2, group(      Main St) [1006.400, 1007.700]
3, group(      Main St) [1007.800, 1008.800)
4, group(      Main St) [1008.700, 1009.900]
.....45 records.....
95, group(      4th Ave) [1003.900, 1005.200]
96, group(      4th Ave) [1005.300, 1006.400)
97, group(      4th Ave) [1006.300, 1007.500]
98, group(      4th Ave) [1007.600, 1008.700)
99, group(      4th Ave) [1008.600, 1009.800]
Rangel(55 grouped, monotonic linear events, closed=left_mod)

In [7]:
rng_linear_2.extend(100, 1000)

0, group(      Main St) [ -96.100, 1005.200)
1, group(      Main St) [ -94.700, 1006.500)
2, group(      Main St) [ -93.600, 1007.700)
3, group(      Main St) [ -92.200, 1008.800)
4, group(      Main St) [ -91.300, 1009.900)
.....45 records.....
95, group(      4th Ave) [ -96.100, 1005.200)
96, group(      4th Ave) [ -94.700, 1006.400)
97, group(      4th Ave) [ -93.700, 1007.500)
98, group(      4th Ave) [ -92.400, 1008.700)
99, group(      4th Ave) [ -91.400, 1009.800]
Rangel(55 grouped, monotonic linear events, closed=left_mod)

## Event relation

In [29]:
sparse = rng_linear_1[:10].intersecting(rng_point_1[:5])
dense = sparse.todense()
sparse, dense, sparse.sum(axis=1)

(<Compressed Sparse Column sparse array of dtype 'bool'
 	with 5 stored elements and shape (9, 5)>,
 array([[ True, False, False, False, False],
        [False,  True, False, False, False],
        [False, False,  True, False, False],
        [False, False, False,  True, False],
        [False, False, False, False,  True],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False]]),
 array([1, 1, 1, 1, 1, 0, 0, 0, 0]))

In [31]:
sparse = rng_linear_1[:10].intersecting(rng_linear_2[:5])
dense = sparse.todense()
sparse, dense, sparse.sum(axis=1)

(<Compressed Sparse Row sparse array of dtype 'bool'
 	with 10 stored elements and shape (9, 5)>,
 array([[False, False, False, False, False],
        [False, False, False, False, False],
        [False, False, False, False, False],
        [ True, False, False, False, False],
        [ True,  True, False, False, False],
        [False,  True,  True, False, False],
        [False, False,  True,  True, False],
        [False, False, False,  True,  True],
        [False, False, False, False,  True]]),
 array([0, 0, 0, 1, 2, 2, 2, 2, 1]))

In [30]:
sparse = rng_linear_1[:10].overlay(rng_linear_2[:5], normalize=True, norm_by='right')
dense = sparse.todense()
sparse, dense.round(2), sparse.sum(axis=1).round(2)

(<Compressed Sparse Row sparse array of dtype 'float64'
 	with 10 stored elements and shape (9, 5)>,
 array([[0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.69, 0.  , 0.  , 0.  , 0.  ],
        [0.31, 0.5 , 0.  , 0.  , 0.  ],
        [0.  , 0.33, 0.54, 0.  , 0.  ],
        [0.  , 0.  , 0.46, 0.5 , 0.  ],
        [0.  , 0.  , 0.  , 0.3 , 0.58],
        [0.  , 0.  , 0.  , 0.  , 0.42]]),
 array([0.  , 0.  , 0.  , 0.69, 0.81, 0.87, 0.96, 0.88, 0.42]))