In [1]:
import numpy as np 
import pandas as pd 

# pandas.eval() for Efficient Operations

The eval() function in Pandas uses string expressions to efficiently compute opera‐
tions using DataFrame s. For example, consider the following DataFrame s:

In [2]:
nrows = 100000
ncols = 100

np.random.seed(42)
df = pd.DataFrame(np.random.rand(1000,3), columns=list("ABC"))

Using pd.eval() as above, we can compute expressions with the three columns like
this

In [3]:
print(pd.eval("df.A + df.B / df.C -1"))
print(df.eval("A+B/C-1"))

0      0.673341
1      0.598813
2      0.499033
3     -0.270704
4      1.000264
         ...   
995    0.069391
996    4.530802
997    1.193780
998    4.633579
999   -0.357367
Length: 1000, dtype: float64
0      0.673341
1      0.598813
2      0.499033
3     -0.270704
4      1.000264
         ...   
995    0.069391
996    4.530802
997    1.193780
998    4.633579
999   -0.357367
Length: 1000, dtype: float64


### Assignment in DataFrame.eval()

In [4]:
df.head()

Unnamed: 0,A,B,C
0,0.37454,0.950714,0.731994
1,0.598658,0.156019,0.155995
2,0.058084,0.866176,0.601115
3,0.708073,0.020584,0.96991
4,0.832443,0.212339,0.181825


In [5]:
df.eval("D = A+B/C", inplace=True)
df

Unnamed: 0,A,B,C,D
0,0.374540,0.950714,0.731994,1.673341
1,0.598658,0.156019,0.155995,1.598813
2,0.058084,0.866176,0.601115,1.499033
3,0.708073,0.020584,0.969910,0.729296
4,0.832443,0.212339,0.181825,2.000264
...,...,...,...,...
995,0.967035,0.051669,0.504796,1.069391
996,0.718454,0.862640,0.179256,5.530802
997,0.800003,0.552707,0.396554,2.193780
998,0.131715,0.865296,0.157273,5.633579


### Local variables in DataFrame.eval()

In [6]:
column_mean = df.mean(axis=1)

column_mean

0      0.932647
1      0.627371
2      0.756102
3      0.606966
4      0.806718
         ...   
995    0.648223
996    1.822788
997    0.985761
998    1.696966
999    0.528470
Length: 1000, dtype: float64

In [9]:
result1 = df["A"] + column_mean
result2 = df.eval("A + @column_mean")
np.allclose(result1, result2)

True

# DataFrame.query() Method
The DataFrame has another method based on evaluated strings, called the query()
method. Consider the following : 

In [11]:
result1 = df[(df.A < 0.5) & (df.B < 0.5) ]
result2 = df.query("(A < 0.5) & (B < 0.5)")
result2

Unnamed: 0,A,B,C,D
5,0.183405,0.304242,0.524756,0.763183
6,0.431945,0.291229,0.611853,0.907924
7,0.139494,0.292145,0.366362,0.936915
12,0.304614,0.097672,0.684233,0.447361
13,0.440152,0.122038,0.495177,0.686606
...,...,...,...,...
970,0.230417,0.001474,0.729345,0.232437
973,0.302931,0.325295,0.712621,0.759408
987,0.345342,0.335610,0.978525,0.688318
990,0.380518,0.163035,0.786206,0.587888
