# Pandas - Eval and Query

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
rng = np.random.RandomState(10) 
# everytime it will generate same seq of random numbers

In [4]:
rng

<mtrand.RandomState at 0x1fe9299510>

In [5]:
x = rng.rand(1000000)
y = rng.rand(1000000)
%timeit x + y

100 loops, best of 3: 4.26 ms per loop


# pandas eval


### This relies on Numexpr package without costly allocation of intermediate placeholder arrays.
### Supports all arithmetic operations
### Comparison Operations
### Bitwise operations
### Object attributes indexing

In [6]:
nrows, ncols = 100000, 100  ## Creating the specified rows and columns

rng = np.random.RandomState(42)

df1, df2, df3, df4 = (pd.DataFrame(rng.rand(nrows, ncols))
                      for i in range(4))

In [7]:
df1 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.374540,0.950714,0.731994,0.598658,0.156019,0.155995,0.058084,0.866176,0.601115,0.708073,...,0.119594,0.713245,0.760785,0.561277,0.770967,0.493796,0.522733,0.427541,0.025419,0.107891
1,0.031429,0.636410,0.314356,0.508571,0.907566,0.249292,0.410383,0.755551,0.228798,0.076980,...,0.093103,0.897216,0.900418,0.633101,0.339030,0.349210,0.725956,0.897110,0.887086,0.779876
2,0.642032,0.084140,0.161629,0.898554,0.606429,0.009197,0.101472,0.663502,0.005062,0.160808,...,0.030500,0.037348,0.822601,0.360191,0.127061,0.522243,0.769994,0.215821,0.622890,0.085347
3,0.051682,0.531355,0.540635,0.637430,0.726091,0.975852,0.516300,0.322956,0.795186,0.270832,...,0.990505,0.412618,0.372018,0.776413,0.340804,0.930757,0.858413,0.428994,0.750871,0.754543
4,0.103124,0.902553,0.505252,0.826457,0.320050,0.895523,0.389202,0.010838,0.905382,0.091287,...,0.455657,0.620133,0.277381,0.188121,0.463698,0.353352,0.583656,0.077735,0.974395,0.986211
5,0.698162,0.536096,0.309528,0.813795,0.684731,0.162617,0.910927,0.822537,0.949800,0.725720,...,0.138827,0.640875,0.181880,0.345667,0.896788,0.473962,0.667558,0.172320,0.192289,0.040869
6,0.168935,0.278590,0.177010,0.088703,0.120636,0.460779,0.206334,0.364270,0.503417,0.690395,...,0.323679,0.425436,0.507610,0.242410,0.114837,0.610620,0.288631,0.581238,0.154363,0.481140
7,0.532589,0.051824,0.336604,0.134415,0.063375,0.989960,0.322354,0.809874,0.254641,0.681503,...,0.153351,0.586230,0.505889,0.611454,0.018110,0.872124,0.932118,0.565133,0.696651,0.922499
8,0.707239,0.152539,0.576288,0.606715,0.424131,0.736444,0.934367,0.925569,0.450839,0.113238,...,0.722267,0.855696,0.830220,0.397184,0.668085,0.204984,0.293148,0.896336,0.013002,0.085509
9,0.207886,0.026532,0.181435,0.583042,0.421425,0.892672,0.817444,0.341817,0.259423,0.379692,...,0.799416,0.694696,0.272145,0.590231,0.360974,0.091582,0.917314,0.136819,0.950237,0.446006


In [8]:
%timeit df1 + df2 + df3 + df4  

10 loops, best of 3: 113 ms per loop


In [9]:
%timeit pd.eval('df1 + df2 + df3 + df4')   ### Takes almost half the time for processing using Pandas

10 loops, best of 3: 51.5 ms per loop


In [10]:
%timeit(df1 < df2) & (df2 <= df3) & (df3 != df4)

1 loop, best of 3: 451 ms per loop


In [11]:
%timeit pd.eval('df1 < df2 <= df3 != df4')

10 loops, best of 3: 67.2 ms per loop


In [12]:
%timeit (df1 < 0.5) & (df2 < 0.5) | (df3 < df4)
%timeit pd.eval('(df1 < 0.5) & (df2 < 0.5) | (df3 < df4)')

1 loop, best of 3: 355 ms per loop
10 loops, best of 3: 77.4 ms per loop


# DataFrame.eval() for Column-Wise Opearation 

In [13]:
df = pd.DataFrame(rng.rand(1000, 3), columns=['A', 'B', 'C'])
df.head()

Unnamed: 0,A,B,C
0,0.615875,0.525167,0.047354
1,0.330858,0.412879,0.441564
2,0.689047,0.559068,0.23035
3,0.290486,0.695479,0.852587
4,0.42428,0.534344,0.245216


In [14]:
res = (df['A'] + df['B']) / (df['C'] - 1)

In [15]:
res

0       -1.197761
1       -1.331822
2       -1.621667
3       -6.688481
4       -1.270064
5       -1.264495
6       -1.097798
7       -1.532708
8       -1.665774
9       -0.842406
10     -10.182075
11      -0.356140
12     -45.154825
13      -1.450962
14      -7.670347
15      -1.159972
16      -2.383616
17      -4.441900
18      -0.435063
19      -0.885392
20      -0.583735
21      -1.138005
22      -1.256148
23      -2.401427
24      -4.932573
25      -1.597291
26      -1.147111
27      -0.338727
28      -1.019897
29      -1.335178
          ...    
970    -11.786311
971     -3.200094
972     -1.728647
973     -3.198657
974     -2.626197
975     -2.581219
976     -5.364230
977     -2.557397
978     -0.894009
979     -1.896405
980     -0.767815
981     -1.669858
982     -1.626391
983     -1.072541
984     -0.654248
985     -2.302684
986     -1.929089
987     -1.522145
988     -2.616053
989     -3.554885
990     -5.241483
991     -1.534723
992   -462.962881
993     -1.774656
994     -1

In [16]:
res2 = pd.eval("(df.A+ df.B) / (df.C - 1)")

In [17]:
res2

0       -1.197761
1       -1.331822
2       -1.621667
3       -6.688481
4       -1.270064
5       -1.264495
6       -1.097798
7       -1.532708
8       -1.665774
9       -0.842406
10     -10.182075
11      -0.356140
12     -45.154825
13      -1.450962
14      -7.670347
15      -1.159972
16      -2.383616
17      -4.441900
18      -0.435063
19      -0.885392
20      -0.583735
21      -1.138005
22      -1.256148
23      -2.401427
24      -4.932573
25      -1.597291
26      -1.147111
27      -0.338727
28      -1.019897
29      -1.335178
          ...    
970    -11.786311
971     -3.200094
972     -1.728647
973     -3.198657
974     -2.626197
975     -2.581219
976     -5.364230
977     -2.557397
978     -0.894009
979     -1.896405
980     -0.767815
981     -1.669858
982     -1.626391
983     -1.072541
984     -0.654248
985     -2.302684
986     -1.929089
987     -1.522145
988     -2.616053
989     -3.554885
990     -5.241483
991     -1.534723
992   -462.962881
993     -1.774656
994     -1

In [18]:
%timeit res

10000000 loops, best of 3: 35.3 ns per loop


In [19]:
%timeit res2

10000000 loops, best of 3: 35.4 ns per loop


# DataFrame Query Method

In [20]:
df.query('A < 0.5 and B > 0.5')

Unnamed: 0,A,B,C
3,0.290486,0.695479,0.852587
4,0.424280,0.534344,0.245216
5,0.111085,0.566276,0.464323
6,0.181763,0.511544,0.368457
7,0.048330,0.779839,0.459669
16,0.448659,0.874896,0.444728
21,0.142193,0.585808,0.360283
22,0.369002,0.869797,0.013812
23,0.239374,0.816387,0.560361
24,0.013722,0.813868,0.832219
