In [2]:
import numpy as np

In [6]:
rng = np.random.RandomState(42)
x = rng.rand(1000000)
y = rng.rand(1000000)
%timeit x + y

2.1 ms ± 90.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%timeit np.fromiter((xi + yi for xi, yi in zip(x, y)), dtype=x.dtype, count=len(x))

169 ms ± 14.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
mask = (x > 0.5) & (y < 0.5)

In [10]:
# equivalent but faster
import numexpr
mask_numexpr = numexpr.evaluate('(x > 0.5) & (y < 0.5)')
np.allclose(mask, mask_numexpr)

True

In [11]:
import pandas as pd

In [12]:
nrows, ncols = 100000, 100
rng = np.random.RandomState(42)
df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols)) for _ in range(4))

In [13]:
%timeit df1 + df2 + df3 + df4

41.3 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
%timeit pd.eval('df1 + df2 + df3 + df4')

19.9 ms ± 921 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


# that's cool, twice as fast

In [30]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
0,0.099772,0.365763,0.422644
1,0.941930,0.160890,0.832411
2,0.692094,0.255150,0.668258
3,0.901389,0.718069,0.577554
4,0.944688,0.354645,0.494979
...,...,...,...
995,0.248944,0.873735,0.666527
996,0.586569,0.881918,0.674439
997,0.787831,0.906578,0.053096
998,0.157618,0.393238,0.154573


In [35]:
df.eval("A + B")
# df["A"] + df["B"]

0      0.465535
1      1.102820
2      0.947244
3      1.619458
4      1.299333
         ...   
995    1.122679
996    1.468487
997    1.694410
998    0.550856
999    1.511344
Length: 1000, dtype: float64

In [37]:
# oh cool, you can assign
df.eval("D = A + B", inplace=True)
df.head()

Unnamed: 0,A,B,C,D
0,0.099772,0.365763,0.422644,0.465535
1,0.94193,0.16089,0.832411,1.10282
2,0.692094,0.25515,0.668258,0.947244
3,0.901389,0.718069,0.577554,1.619458
4,0.944688,0.354645,0.494979,1.299333


In [47]:
# local variables
print("df[A]",df["A"].mean())
print()
print("df.mean()",df.mean())
print()
print("df.mean(axis=1)",df.mean(axis=1))

df[A] 0.5050982408367849

df.mean() A    0.505098
B    0.487818
C    0.493934
D    0.992916
dtype: float64

df.mean(axis=1) 0      0.338429
1      0.759513
2      0.640686
3      0.954118
4      0.773411
         ...   
995    0.727971
996    0.902853
997    0.860479
998    0.314071
999    0.961770
Length: 1000, dtype: float64


In [48]:
column_mean = df.mean(axis=1)
result1 = df["A"] + column_mean
result2 = df.eval("A + @column_mean")
np.allclose(result1, result2)

True

In [54]:
print(df[(df.A < 0.5) & (df.B < 0.5)].shape)

result1 = df[(df.A < 0.5) & (df.B < 0.5)]
result2 = pd.eval('df[(df.A < 0.5) & (df.B < 0.5)]')

np.allclose(result1, result2)

(239, 4)


True

In [56]:
# you can use query
result2 = df.query('A < 0.5 & B < 0.5')
np.allclose(result1, result2)

True