# profiling different methods for getting column or index values.

In [None]:
import pandas as pd
import obsplus
import obspy
import obspy.core.event as ev
from obsplus.utils.pd import loc_by_name

ds = obsplus.load_dataset('bingham_test')
cat = ds.event_client.get_events()

new_cat = obspy.Catalog(cat.events * 1000)

df = obsplus.events_to_df(new_cat)

In [None]:
df = pd.concat([df] * 100, axis=0, ignore_index=True)

In [None]:
import numpy as np
# add new indices
df['resource_id'] = [str(ev.ResourceIdentifier()) for _ in range(len(df))]
df['parent_id'] = [str(ev.ResourceIdentifier()) for _ in range(len(df))]
df['scope_id'] = [str(ev.ResourceIdentifier()) for _ in range(len(df))]
df['index'] = np.random.randint(1, 10, size=(len(df)))
df['attr'] = 'something'

# Profile indexing

In [None]:
sub_query = df['resource_id'].values[::20]

In [None]:
# no index
%timeit df[df['resource_id'].isin(sub_query)]

In [None]:
# single index
df_ = df.set_index('resource_id')

In [None]:
%timeit df_.loc[sub_query]

In [None]:
df__ = df.set_index(['resource_id', 'attr', 'index'])

In [None]:
%timeit df__.loc[(sub_query, slice(None), slice(None))]

In [None]:
# multindex, resource_id last
df_ = df.set_index(['scope_id', 'parent_id', 'index', 'attr', 'resource_id']).sort_index()

In [None]:
something = set(sub_query)

In [None]:
%timeit df.query("attr=='something' & index==1")

In [None]:
%timeit df[(df['resource_id'].isin(sub_query)) & (df['attr'].isin(['something'])) & (df['index'].isin([1]))]

In [None]:
%load_ext snakeviz


In [None]:
%%snakeviz
loc_by_name(df_, resource_id=sub_query)