## Example of using boolean filters

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
src_file = Path.cwd() / 'data' / 'raw' / 'sample_sales_details.xlsx'

In [3]:
df = pd.read_excel(src_file)
df

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
0,ZN-870-29,Realcube,2019-03-05,L-Y-71,shirt,19,17,323,6.46
1,JQ-501-63,Zooxo,2019-07-09,E-532,book,30,14,420,8.40
2,FI-165-58,Dabtype,2019-08-12,S-909,poster,7,23,161,3.22
3,XP-005-55,Skipfire,2019-11-18,B-B-5,pen,7,29,203,4.06
4,NB-917-18,Bluezoom,2019-04-18,L-477,poster,36,19,684,13.68
...,...,...,...,...,...,...,...,...,...
995,ZM-628-88,Viva,2019-09-11,B-B-5,pen,-5,34,-170,0.00
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88
997,RA-147-40,Dabfeed,2019-03-24,M-809,poster,17,34,578,11.56
998,VT-754-54,Photobean,2019-12-30,L-G-31,shirt,15,18,270,5.40


In [4]:
df['company'] == 'Viva'

0      False
1      False
2      False
3      False
4      False
       ...  
995     True
996     True
997    False
998    False
999    False
Name: company, Length: 1000, dtype: bool

In [5]:
viva = df['company'] == 'Viva'

In [6]:
df.loc[viva, :]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
186,CM-205-86,Viva,2019-05-19,E-327,book,8,14,112,2.24
705,QU-986-45,Viva,2019-06-09,H-966,book,6,35,210,4.2
840,RF-796-61,Viva,2019-02-19,S-393,poster,46,16,736,14.72
995,ZM-628-88,Viva,2019-09-11,B-B-5,pen,-5,34,-170,0.0
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88


In [7]:
df[viva]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
186,CM-205-86,Viva,2019-05-19,E-327,book,8,14,112,2.24
705,QU-986-45,Viva,2019-06-09,H-966,book,6,35,210,4.2
840,RF-796-61,Viva,2019-02-19,S-393,poster,46,16,736,14.72
995,ZM-628-88,Viva,2019-09-11,B-B-5,pen,-5,34,-170,0.0
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88


In [8]:
qty_10 = (df['quantity'] >= 10)

In [9]:
qty_10

0       True
1       True
2      False
3      False
4       True
       ...  
995    False
996     True
997     True
998     True
999     True
Name: quantity, Length: 1000, dtype: bool

In [10]:
df.loc[(qty_10 & viva) , :]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
840,RF-796-61,Viva,2019-02-19,S-393,poster,46,16,736,14.72
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88


In [11]:
df.loc[(qty_10 & viva) , 'purchase_date':'price']

Unnamed: 0,purchase_date,sku,product,quantity,price
840,2019-02-19,S-393,poster,46,16
996,2019-09-05,M-B-5,pen,17,32


In [12]:
df['company'].str.contains('buzz')

0      False
1      False
2      False
3      False
4      False
       ...  
995    False
996    False
997    False
998    False
999     True
Name: company, Length: 1000, dtype: bool

In [13]:
df.loc[df['company'].str.contains('buzz'), :]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
23,NE-854-88,Realbuzz,2019-05-08,M-271,poster,11,14,154,3.08
48,NM-907-40,Mybuzz,2019-11-29,S-500,poster,11,26,286,5.72
153,KW-456-55,Realbuzz,2019-05-21,S-393,poster,38,22,836,16.72
206,GV-236-09,Linkbuzz,2019-06-25,M-809,poster,49,18,882,17.64
326,EY-555-15,Realbuzz,2019-04-18,XL-B-12,shirt,0,12,0,0.0
470,SW-903-99,Realbuzz,2019-07-30,L-477,poster,6,29,174,3.48
589,HS-879-34,Mybuzz,2019-02-15,L-743,poster,36,30,1080,21.6
687,LU-384-08,Linkbuzz,2019-01-15,S-B-41,shirt,35,35,1225,24.5
947,YW-362-87,Linkbuzz,2019-12-11,P-529,book,45,17,765,15.3
999,LS-463-74,Mybuzz,2019-11-12,S-G-73,shirt,24,34,816,16.32


In [14]:
df.loc[df['sku'].str.startswith('S'), 'product'].value_counts()

product
shirt     122
poster    107
Name: count, dtype: int64

## Working with dates

In [15]:
df[df['purchase_date'] >= '12-1-2019']

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
32,MX-599-18,Vimbo,2019-12-12,P-393,book,36,33,1188,23.76
33,NN-163-23,Skyble,2019-12-27,M-B-5,pen,45,12,540,10.80
34,FP-453-25,Zava,2019-12-05,S-393,poster,0,13,0,0.00
54,SI-328-34,Babbleblab,2019-12-13,M-G-21,shirt,3,34,102,2.04
57,JF-213-00,Centimia,2019-12-22,B-B-5,pen,6,17,102,2.04
...,...,...,...,...,...,...,...,...,...
951,HY-069-99,Oba,2019-12-12,M-271,poster,18,25,450,9.00
956,WO-091-32,Eire,2019-12-05,M-G-21,shirt,15,20,300,6.00
966,FO-483-27,Cogibox,2019-12-10,S-909,poster,25,15,375,7.50
985,DX-716-83,Aimbo,2019-12-17,M-B-5,pen,38,15,570,11.40


In [16]:
purchase_date = df['purchase_date'].dt.month == 11
product_type = df['product'] == 'book'

In [17]:
df[purchase_date & product_type]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
16,IG-811-84,Yadel,2019-11-15,H-475,book,46,28,1288,25.76
51,MM-603-72,Snaptags,2019-11-14,P-944,book,13,25,325,6.5
67,DB-217-08,Dynabox,2019-11-17,E-327,book,17,26,442,8.84
77,TL-625-84,Snaptags,2019-11-02,H-966,book,28,33,924,18.48
148,UE-108-04,Yodo,2019-11-24,P-343,book,29,27,783,15.66
194,CP-917-92,Kamba,2019-11-13,E-777,book,41,16,656,13.12
248,LZ-428-00,Photojam,2019-11-22,H-966,book,0,25,0,0.0
303,PE-226-53,Gabvine,2019-11-22,P-393,book,19,18,342,6.84
338,EP-683-72,Npath,2019-11-22,E-961,book,1,15,15,0.3
528,QP-747-38,Ainyx,2019-11-19,P-293,book,9,31,279,5.58


In [18]:
df[df['quantity'] > 12]

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost
0,ZN-870-29,Realcube,2019-03-05,L-Y-71,shirt,19,17,323,6.46
1,JQ-501-63,Zooxo,2019-07-09,E-532,book,30,14,420,8.40
4,NB-917-18,Bluezoom,2019-04-18,L-477,poster,36,19,684,13.68
6,MQ-907-02,Babbleset,2019-10-27,L-579,poster,30,21,630,12.60
7,NX-102-26,Fliptune,2019-10-16,E-201,book,40,28,1120,22.40
...,...,...,...,...,...,...,...,...,...
994,ON-247-90,Photobug,2019-09-29,L-G-13,shirt,40,31,1240,24.80
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88
997,RA-147-40,Dabfeed,2019-03-24,M-809,poster,17,34,578,11.56
998,VT-754-54,Photobean,2019-12-30,L-G-31,shirt,15,18,270,5.40


In [19]:
df['min_order_size'] = 5

In [20]:
df.head()

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost,min_order_size
0,ZN-870-29,Realcube,2019-03-05,L-Y-71,shirt,19,17,323,6.46,5
1,JQ-501-63,Zooxo,2019-07-09,E-532,book,30,14,420,8.4,5
2,FI-165-58,Dabtype,2019-08-12,S-909,poster,7,23,161,3.22,5
3,XP-005-55,Skipfire,2019-11-18,B-B-5,pen,7,29,203,4.06,5
4,NB-917-18,Bluezoom,2019-04-18,L-477,poster,36,19,684,13.68,5


In [21]:
small_orders = df['quantity'] < df['min_order_size']

In [22]:
df.loc[small_orders, 'company':'product']

Unnamed: 0,company,purchase_date,sku,product
5,Zooveo,2019-10-17,B-BK-5,pen
8,Buzzbean,2019-06-17,S-393,poster
19,Babbleblab,2019-10-23,M-G-21,shirt
24,Mynte,2019-11-25,M-BK-5,pen
27,DabZ,2019-01-23,M-B-5,pen
...,...,...,...,...
980,Roomm,2019-09-23,B-B-1,pen
981,Dabfeed,2019-03-25,M-271,poster
984,Reallinks,2019-06-24,M-BK-5,pen
986,Jamia,2019-11-06,L-G-13,shirt


In [23]:
df.query('quantity > 10')

Unnamed: 0,invoice,company,purchase_date,sku,product,quantity,price,extended amount,shipping_cost,min_order_size
0,ZN-870-29,Realcube,2019-03-05,L-Y-71,shirt,19,17,323,6.46,5
1,JQ-501-63,Zooxo,2019-07-09,E-532,book,30,14,420,8.40,5
4,NB-917-18,Bluezoom,2019-04-18,L-477,poster,36,19,684,13.68,5
6,MQ-907-02,Babbleset,2019-10-27,L-579,poster,30,21,630,12.60,5
7,NX-102-26,Fliptune,2019-10-16,E-201,book,40,28,1120,22.40,5
...,...,...,...,...,...,...,...,...,...,...
994,ON-247-90,Photobug,2019-09-29,L-G-13,shirt,40,31,1240,24.80,5
996,DQ-810-46,Viva,2019-09-05,M-B-5,pen,17,32,544,10.88,5
997,RA-147-40,Dabfeed,2019-03-24,M-809,poster,17,34,578,11.56,5
998,VT-754-54,Photobean,2019-12-30,L-G-31,shirt,15,18,270,5.40,5
