#### query() 

To query the rows or the columns based on the expression (single or multiple column conditions) provided and returns a new DataFrame

Columns names must not have empty spaces.

`expr` – expression takes conditions to query rows

In [115]:
import pandas as pd
df = pd.DataFrame({'A': range(1, 6),
                   'B': range(10, 0, -2),
                   'C_C': range(10, 5, -1)})
df

Unnamed: 0,A,B,C_C
0,1,10,10
1,2,8,9
2,3,6,8
3,4,4,7
4,5,2,6


In [116]:
# Querying rows with a specific value
df.query('A > 2 ')

Unnamed: 0,A,B,C_C
2,3,6,8
3,4,4,7
4,5,2,6


In [117]:
# selecting rows based on column values passed in a list
_list = [2,5,6,8]
df.query('A in @_list')
# Equivalent to df.query('A in (2,5,6,8)')

Unnamed: 0,A,B,C_C
1,2,8,9
4,5,2,6


In [118]:
# Multiple conditions
df.query('A > 2 and C_C > 7')

Unnamed: 0,A,B,C_C
2,3,6,8


In [119]:
# 'not' Operator 
df.query('~(A > 2)')
# equivalent to df.query('not(A > 2)')

Unnamed: 0,A,B,C_C
0,1,10,10
1,2,8,9


In [120]:
df.query('A != 2 and B != 4')

Unnamed: 0,A,B,C_C
0,1,10,10
2,3,6,8
4,5,2,6


In [121]:
# selecting rows based on column values passed in a list that is not in df
_list = [2,5,6,8]
df.query('A not in @_list and B not in @_list')
# Equivalent to df.query('~ (A in (2,5,6,8) and (B in (2,5,6,8)))')

Unnamed: 0,A,B,C_C
0,1,10,10
3,4,4,7


In [122]:
# math operations 
df.query('B/2 > 3')

Unnamed: 0,A,B,C_C
0,1,10,10
1,2,8,9


In [123]:
# Use of inbuilt funcitons like sqrt(), abs()
df.query('sqrt(C_C) > 2.85')

Unnamed: 0,A,B,C_C
0,1,10,10
1,2,8,9


In [124]:
# Using index as condtions. 
df.query('index > 2')

# Similarly we can use index name in place of index keyword
# Equivalent to df.query('a > 2'), a is the name of index

Unnamed: 0,A,B,C_C
3,4,4,7
4,5,2,6


In [125]:
# looking through date_time data
df["Date"] = pd.date_range(start="2021-04-05", periods=5, freq="M")

df.query('Date.dt.month >= 6')

Unnamed: 0,A,B,C_C,Date
2,3,6,8,2021-06-30
3,4,4,7,2021-07-31
4,5,2,6,2021-08-31


In [126]:
# Querying based on conditions of comparing between two columns or more columns
df.query('A + B < C_C')

Unnamed: 0,A,B,C_C,Date


In [127]:
df

Unnamed: 0,A,B,C_C,Date
0,1,10,10,2021-04-30
1,2,8,9,2021-05-31
2,3,6,8,2021-06-30
3,4,4,7,2021-07-31
4,5,2,6,2021-08-31


In [128]:
# querying row using index as variables
var = 2
df.query('@var')

A                         3
B                         6
C_C                       8
Date    2021-06-30 00:00:00
Name: 2, dtype: object

In [129]:
# Multple conditions
df.query('A in @_list ' and 'B in @_list')

Unnamed: 0,A,B,C_C,Date
1,2,8,9,2021-05-31
2,3,6,8,2021-06-30
4,5,2,6,2021-08-31


In [130]:
# apply() method and lambda expression
_list = [2,5]
df.apply( lambda val : val[df['A'].isin(_list)]) # pure python

# Equivalent to df.query('A in @_list')          # using query() method

Unnamed: 0,A,B,C_C,Date
1,2,8,9,2021-05-31
4,5,2,6,2021-08-31
