In [1]:
import itertools
import numpy as np
import pandas as pd

In [2]:
itertools.product(np.arange(0, 10), repeat=2)

<itertools.product at 0x7fa424b4e8b8>

In [3]:
agg_df = pd.DataFrame(itertools.product(np.arange(0, 10), repeat=2))
agg_df.columns = ['f1', 'f2']
agg_df['value'] = np.random.randint(1, 1001, size=agg_df.shape[0])
agg_df

Unnamed: 0,f1,f2,value
0,0,0,329
1,0,1,735
2,0,2,968
3,0,3,884
4,0,4,340
...,...,...,...
95,9,5,109
96,9,6,89
97,9,7,891
98,9,8,116


In [4]:
def inverse_agg_count(series):
    '''
    Inverse count agregation.
    
    Parameters
    ----------
    series : pd.Series with 1d index
    Count aggregation result.
    
    Returns
    -------
    index : 1d index
    Index values repeated series.values times.
    index.to_series().groupby(index).count() returns series.
    '''
    return np.repeat(series.index, series)

In [5]:
def repeat_range(repeats, range_step=1):
    '''
    Repeat range.
    
    Parameters
    ----------
    repeats : 1d np.ndarray, dtype=int
    Nonzero number of repetitions for each element in range.
    
    range_step : int
    Step of sequence of integers from 0 to len(repeats).
    
    Returns
    -------
    res : 1d np.ndarray, dtype : int
    '''
    idx = repeats.cumsum()
    res = np.zeros(idx[-1], dtype=int)
    res[idx[:-1]] = range_step
    res = res.cumsum()
    return res

In [6]:
def inv_agg_count(series):
    res = []
    for i, v in series.iteritems():
        res.extend([i]*v)
    return res

In [7]:
res1 = pd.Series(inv_agg_count(agg_df['value']))
res3 = pd.Series(inverse_agg_count(agg_df['value']))
res2 = pd.Series(repeat_range(agg_df['value'].values))

In [8]:
res = pd.concat([res1, res2, res3], axis=1)
mask = res == res1.values.reshape(-1, 1)
mask.values.all()

True

In [9]:
%timeit inv_agg_count(agg_df['value'])

323 µs ± 807 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [10]:
%timeit inverse_agg_count(agg_df['value'])

133 µs ± 3.9 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [11]:
%timeit repeat_range(agg_df['value'].values)

79.8 µs ± 194 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
