# Creating a custom Index in Pandas

The pandas.Datetimeindex is the jam because of the extra properties that access portions of the time.  I'd really like a `pathlib` version of this, `...` and a `luigi` target version.

In [1]:
from pathlib import Path, os
from pandas import Index, Series, DataFrame; import pandas as pd
from toolz.curried.operator import *; from toolz.curried import *

## Using typing information as data

`pathlib` has an annotated type file in `typeshed`.  The script below makes the annotations importable.

In [2]:
%%file pathlib_types.py
from pathlib import Path, PurePath
exec(next(
    Path(Path.cwd().parent.parent/'typeshed/stdlib/').rglob('pathlib*')).read_text(), globals())

Overwriting pathlib_types.py


In [3]:
import pathlib_types as types
target = types.Path

The  __annotations__ for `Path`'s attributes are held in `target`; _this is a function parameter later_.

In [4]:
def method(x, target=target):
    """Get annotations from bound methods and properties.s"""
    if isinstance(x, property):
        return getattr(target, x.fget.__name__).fget.__annotations__['return']
    attr = getattr(x, '__name__', None)
    if attr in dir(target):
        return getattr(getattr(target, attr), '__annotations__', {}).get('return', None)

`returns` is an object of the attributes and their return `type`.

In [5]:
returns = pipe(
    target, dir, filter(compose(str.isalpha, first)), 
    map(juxt(identity, curry(getattr)(target))),
    dict, valmap(method))

Series(returns).astype(str).to_frame().reset_index().set_index(0).sort_index().sample(10).T

Unnamed: 0,<class 'str'>,None,<class 'int'>,None.1,<class 'bool'>,<class 'os.stat_result'>,None.2,<class 'bool'>.1,~_P,None.3
index,read_text,rmdir,write_bytes,name,is_socket,stat,lchmod,is_reserved,joinpath,replace


> A small sample of the type to attribute mapping.

In [6]:
from functools import wraps

class PathIndex(Index):
    def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs):
        if isiterable(data) and not isinstance(data, Index):
            data = cls._coerce_to_ndarray(map(Path, data))
        result = cls._simple_new(data, name, dtype, copy=copy, fastpath=fastpath, tupleize_cols=tupleize_cols, **kwargs)
        return result
    
    def __getattribute__(self, name):
        try:
            return super().__getattribute__(name)
        except AttributeError as e:
            attr = getattr(Path, name, None)
            
            # The name is wrong!
            if not attr: raise e
            
            cls = type_mapper[method(attr)]
            
            # There is no way to interpret the attribute
            if cls is False: raise NotImplemented
            
            
            if isinstance(attr, property): #  return computed values, not a callable.
                return (self if cls is None else cls(self.map(attrgetter(name))))

            # wrap a callable for later.
            @wraps(attr)
            def mapper(*args, **kwargs):                        
                if cls is None: return self        
                result = cls(self.map(partial(lambda v: attr(v, *args, **kwargs))))
                if isinstance(result, (Series, DataFrame)):
                    result.index = self
                return result
            return mapper
            
    def __truediv__(self, value):
        return type(self)(self.map(flip(Path.__truediv__)(value)))
    
    def __rtruediv__(self, value):
        return type(self)(self.map(flip(Path.__rtruediv__)(value)))

    def __dir__(self):
        """Update the available attribute."""
        return list(super().__dir__()) + dir(Path)

In [7]:
type_mapper = {
    bool: Index,
    types._P: PathIndex,
    None: None, 
    types.IO[types.Any]: False,
    types.Generator[Path, None, None]: compose(PathIndex, list, concat),
    types.Sequence[types._P]: compose(PathIndex, list, concat),
    int: Index, os.stat_result: Index,
    str: Series, bytes: Series,
}

## A test drive of `PathIndex`


In [8]:
assert all(map(contains(type_mapper.keys()), returns.values()))

if __name__ == '__main__':
    i = PathIndex(['nbd', 'fidget', '.'])
    print(i.exists(), ('../..'/i).exists())

Index([False, False, True], dtype='object') Index([True, True, True], dtype='object')


In [9]:
if __name__ == '__main__':
    df = ('../..'/i)[:2].glob('*.ipynb').read_text().apply(
        __import__('nbformat').reads, args=[4]
    ).apply(
        compose(DataFrame, get('cells'))
    ).pipe(compose(pd.concat, dict, Series.iteritems))
    print(i.exists(), ('../..'/i).exists())
__name__ == '__main__' and df

Index([False, False, True], dtype='object') Index([True, True, True], dtype='object')


Unnamed: 0,Unnamed: 1,cell_type,execution_count,metadata,outputs,source
../../fidget/readme.ipynb,0,markdown,,{},,# `fidget` - A literate syntax for functional ...
../../fidget/readme.ipynb,1,code,17.0,{},"[{'metadata': {}, 'data': {'text/plain': '[100...",from fidget import _x as __x__\n\n(__x__\n * r...
../../fidget/readme.ipynb,2,markdown,,{},,`fidget` includes _most_ of the Python model a...
../../fidget/readme.ipynb,3,code,47.0,{},"[{'metadata': {}, 'data': {'text/plain': '""['d...",__x__[dir].filter(-__x__.first().eq('_')).rand...
../../fidget/readme.ipynb,4,code,53.0,{},"[{'name': 'stdout', 'text': '<class 'str'> ', ...",_x(10) >> (_x << str >> type >> print) >> __x_...
../../fidget/readme.ipynb,5,markdown,,{},,## [Examples](https://github.com/tonyfast/fidg...
../../fidget/readme.ipynb,6,markdown,,{},,---\n\n## More\n\nThis project is developed fo...
../../fidget/readme.ipynb,7,code,64.0,{},"[{'name': 'stderr', 'text': '[NbConvertApp] Co...",%%bash \njupyter nbconvert --to custom --Expor...
../../fidget/readme.ipynb,8,code,65.0,{},"[{'name': 'stdout', 'text': '<class 'NoneType'...",%%bash\ncd test\ncoverage erase\ncoverage run ...
../../fidget/readme.ipynb,9,code,1.0,{},"[{'name': 'stdout', 'text': '[NbConvertApp] Co...",\n!jupyter nbconvert --to markdown readme.ipynb


### Notes

* Logic works.