In [None]:
import numpy as np

rng = np.random.default_rng(42)

X = rng.standard_normal(size=(50_000, 10_000))

# np.sum

In [None]:
%%timeit -n 20 -r 5

np.sum(X, axis=1)

# @

In [None]:
Y = rng.standard_normal(size=(10_000, 2_000))

In [None]:
Z = X @ Y

## How to run `add` in parallel?

### Using Numba

In [None]:
from numba import njit, prange

In [None]:
@njit(parallel=True)
def numba_sum_axis_1(X):
    rows, cols = X.shape
    out = np.zeros(rows)
    
    for i in prange(rows):
        for j in range(cols):
            out[i] += X[i][j]
    return out

In [None]:
%%timeit -n 20 -r 5

numba_sum_axis_1(X)

## Pytorch

In [None]:
import torch

In [None]:
import numpy as np

rng = np.random.default_rng(42)
X = rng.standard_normal(size=(50_000, 10_000))

In [None]:
def torch_sum(X, axis):
    X_torch = torch.asarray(X)
    X_sum = torch.sum(X_torch, dim=axis)
    
    return np.asarray(X_sum)

In [None]:
%%timeit -n 20 -r 5

torch_sum(X, axis=1)

## Pandas!

In [None]:
import pandas as pd
rng = np.random.default_rng(0)
X = rng.poisson(lam=3.0, size=(1_000_000, 10))

In [None]:
data = pd.DataFrame(X)
roll = data.rolling(100)

### Default Pandas

In [None]:
%%timeit -n 10 -r 1
_ = roll.mean()

### Numba only

In [None]:
%%timeit -n 10 -r 1
_ = roll.mean(engine="numba")

### Numba with Parallel

In [None]:
%%timeit -n 30 -r 10
_ = roll.mean(engine="numba", engine_kwargs={"parallel":True})