In [1]:
%load_ext Cython

In [2]:
import numpy as np
import pandas as pd

y = np.random.randint(2,size=(5000,1))
x = np.random.randint(10,size=(5000,1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [3]:
def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby([x_name], as_index=False).agg(['mean', 'count'])
        result[i] = groupby_result.loc[groupby_result.index == data.loc[i, x_name], (y_name, 'mean')]
    return result

In [4]:
%%timeit

target_mean_v1(data, 'y', 'x')

1 loop, best of 3: 26.1 s per loop


In [5]:
def target_mean_v2(data,y_name,x_name):
  result = np.zeros(data.shape[0])
  v_value  = data.shape[0]
  value_dict = dict()
  count_dict = dict()
  for i in range(v_value):
    if data.loc[i,x_name] not in value_dict.keys():
      value_dict[data.loc[i,x_name]] = data.loc[i,y_name]
      count_dict[data.loc[i,x_name]] = 1
    else:
      value_dict[data.loc[i,x_name]] += data.loc[i,y_name]
      count_dict[data.loc[i,x_name]] +=1
  for i in range(v_value):
    result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
  return result

In [6]:
%%timeit

target_mean_v2(data, 'y', 'x')

1 loop, best of 3: 263 ms per loop


In [9]:
def target_mean_v3(data,y_name,x_name):
  result = np.zeros(data.shape[0])
  v_value  = data.shape[0]
  value_dict = dict()
  count_dict = dict()
  for i in range(v_value):
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    if x_value not in value_dict.keys():
        value_dict[x_value] = y_value
        count_dict[x_value] = 1
    else:
        value_dict[x_value] += y_value
        count_dict[x_value] += 1
  for i in range(v_value):
      x_value_ = data.loc[i, x_name]
      result[i] = (value_dict[x_value_] - data.loc[i, y_name]) / (count_dict[x_value_] - 1)
  return result

In [10]:
%%timeit

target_mean_v3(data, 'y', 'x')

10 loops, best of 3: 153 ms per loop


In [16]:
def target_mean_v4(data,y_name,x_name):
  result = np.zeros(data.shape[0])
  v_value  = data.shape[0]
  value_list = np.zeros(10)
  count_list = np.zeros(10)
  for i in range(v_value):
    x_value = data.loc[i, x_name]
    y_value = data.loc[i, y_name]
    value_list[x_value] += y_value
    count_list[x_value] += 1
  for i in range(v_value):
      x_value_ = data.loc[i, x_name]
      result[i] = (value_list[x_value_] - data.loc[i, y_name]) / (count_list[x_value_] - 1)
  return result

In [17]:
%%timeit

target_mean_v4(data, 'y', 'x')

10 loops, best of 3: 160 ms per loop


In [23]:
%%cython -a
import cython
import numpy as np
cimport numpy as cnp


cpdef cnp.ndarray[double] target_mean_v5(cnp.ndarray[long, ndim=2] data):
    cdef int[:] x = data[:,1].astype(np.intc)
    cdef int[:] y = data[:,0].astype(np.intc)
    cdef int v_value = data.shape[0]
    cdef cnp.ndarray[double] result = np.zeros(v_value)
    cdef int[:] value_dict = np.zeros(v_value).astype(np.intc)
    cdef int[:] count_dict = np.zeros(v_value).astype(np.intc)
    cdef unsigned int i
    for i in range(v_value):
        value_dict[x[i]] += y[i]

        count_dict[x[i]] += 1
    for i in range(v_value):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result

In [24]:
data_ = np.concatenate([y, x], axis=1)

In [25]:
%%timeit

target_mean_v5(data_)

The slowest run took 19.03 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 56.4 µs per loop


In [31]:
%%cython -a
import cython
import numpy as np
cimport numpy as cnp


cpdef cnp.ndarray[double] target_mean_v6(cnp.ndarray[long, ndim=2] data):
    cdef cnp.ndarray[long] x = data[:,1].astype(np.int_)
    cdef cnp.ndarray[long] y = data[:,0].astype(np.int_)
    cdef int v_value = data.shape[0]
    cdef cnp.ndarray[double] result = np.zeros(v_value, dtype=np.double)
    cdef cnp.ndarray[long] value_dict = np.zeros(v_value, dtype=np.int_)
    cdef cnp.ndarray[long] count_dict = np.zeros(v_value, dtype=np.int_)
    cdef unsigned int i
    for i in range(v_value):
        value_dict[x[i]] += y[i]

        count_dict[x[i]] += 1
    for i in range(v_value):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result

In [32]:
%%timeit

target_mean_v6(data_)

The slowest run took 18.65 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 50.8 µs per loop


In [33]:
%%cython -a
import cython
import numpy as np
cimport numpy as cnp
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)

cpdef cnp.ndarray[double] target_mean_v7(cnp.ndarray[long, ndim=2] data):
    cdef cnp.ndarray[long] x = data[:,1].astype(np.int_)
    cdef cnp.ndarray[long] y = data[:,0].astype(np.int_)
    cdef int v_value = data.shape[0]
    cdef cnp.ndarray[double] result = np.zeros(v_value, dtype=np.double)
    cdef cnp.ndarray[long] value_dict = np.zeros(v_value, dtype=np.int_)
    cdef cnp.ndarray[long] count_dict = np.zeros(v_value, dtype=np.int_)
    cdef unsigned int i
    for i in prange(v_value, nogil=True):
        value_dict[x[i]] += y[i]

        count_dict[x[i]] += 1
    for i in prange(v_value, nogil=True):
        result[i] = (value_dict[x[i]] - y[i]) / (count_dict[x[i]] - 1)
    return result



In [34]:
%%timeit

target_mean_v7(data_)

The slowest run took 17.72 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 42.6 µs per loop
