### boolean indexing

In [3]:
import numpy as np
import pandas as pd

In [4]:
x_np = np.array([-1,2,10,-5])
x_pd = pd.DataFrame({'x': x_np})
mymatrix = np.arange(1,13).reshape(3,4)

In [5]:
idx = x_np > 2
x_np[idx]

array([10])

#### flexindex

In [6]:
# np
x_np[x_np > 2]
x_np[x_np != 3]
x_np[np.where((x_np >= 2) & (x_np != 10))]  

# abstaining from others as they aren't things one should do and would be ugly if even possible here

# pd
# x_pd[x_pd['x'] > 2]

array([2])

#### loop

In [7]:
x_pd['y'] = ''
for i in np.arange(x_pd.shape[0]):
    check = x_pd.loc[i,'x'] > 2
    
    if check == True:
        x_pd.loc[i,'y'] = 'Yes'
    else:
        x_pd.loc[i,'y'] = 'No'

In [8]:
x_pd

Unnamed: 0,x,y
0,-1,No
1,2,No
2,10,Yes
3,-5,No


#### boolnoloop

In [9]:
x_pd['y'] = np.where(x_pd['x'] > 2, 'Yes', 'No')

In [10]:
x_pd

Unnamed: 0,x,y
0,-1,No
1,2,No
2,10,Yes
3,-5,No


##### vecmatrixop

In [11]:
mymatrix_log = np.log(mymatrix)

#### loopsvec

In [12]:
# in what follows I duplicate the R code, but np apply is not necessarily comparable to R's apply but a similar result occurs
mymatrix = np.random.uniform(size=100).reshape(10,10)

In [13]:
mymatrix2 = np.apply_along_axis(np.log, 0, mymatrix)
np.array_equal(np.log(mymatrix), mymatrix2)

True

In [14]:
%timeit np.log(mymatrix)
%timeit np.apply_along_axis(np.log, 1, mymatrix)

858 ns ± 8.65 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
40.2 µs ± 581 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


#### timings

In [15]:
mymat = np.random.normal(size=100000).reshape(100,1000)

In [16]:
# won't be doing parallel comparison

def stdize(x):
    return (x-x.mean())/x.std()

def doubleloop(matrix):
    mat = matrix.copy()
    for i in range(matrix.shape[1]):
        x = mat[:,i]
        for j in range(len(x)):
            mat[j,i] = (x[j]- x.mean())/x.std()
    return mat

def singleloop(matrix):
    mat = matrix.copy()
    for i in range(matrix.shape[1]):
        x = matrix[:,i]
        mat[:,i] = (x-x.mean())/x.std()
    return matrix

In [17]:
mymat_stdize = np.apply_along_axis(stdize, 0, mymat)

In [18]:
mymat_dl = doubleloop(mymat)

In [19]:
mymat_sl = singleloop(mymat)

In [20]:
np.array_equal(mymat_stdize, mymat_sl)
np.array_equal(mymat_stdize, mymat_dl)

False

In [21]:
# in order of slowest first
%timeit doubleloop(mymat)
%timeit np.apply_along_axis(stdize, 0, mymat)
%timeit singleloop(mymat)
%timeit (mymat - mymat.mean(axis=0)) / mymat.std(axis=0)

179 µs ± 2.96 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
111 µs ± 1.69 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
72.9 µs ± 5.04 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
24.9 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
