## Pandas Dataframe Filter Operations

In [1]:
import pandas as pd

In [2]:
employees = pd.read_csv('./data/employees.csv')

In [3]:
employees.head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.34,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services


In [4]:
genderFilter = (employees["Gender"] == "Male")
employees[genderFilter].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.34,True,Finance


In [5]:
teamFilter = (employees["Team"] != "Finance")
employees[teamFilter].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services


In [6]:
salaryFilter = employees["Salary"] > 20000
employees[salaryFilter].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance


In [7]:
startDateFilter = employees["Start Date"] <= '1990-05-03'
employees[startDateFilter].head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
8,Angela,Female,11/22/2005,6:29 AM,95570,18.523,True,Engineering
11,Julie,Female,10/26/1997,3:19 PM,102508,12.637,True,Legal
12,Brandon,Male,12/1/1980,1:08 AM,112807,17.492,True,Human Resources
13,Gary,Male,1/27/2008,11:40 PM,109831,5.831,False,Sales


In [8]:
# `&` and `|` operations
employees[startDateFilter & salaryFilter].head(2)
employees[startDateFilter |  salaryFilter].head(2)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,


In [9]:
# isin() function
mask = employees["Team"].isin(["Legal","Scales","Product"])
employees[mask].head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
5,Dennis,Male,4/18/1987,1:35 AM,115163,10.125,False,Legal
6,Ruby,Female,8/17/1987,4:20 PM,65476,10.012,True,Product
11,Julie,Female,10/26/1997,3:19 PM,102508,12.637,True,Legal
15,Lillian,Female,6/5/2016,6:09 AM,59414,1.256,False,Product
17,Shawn,Male,12/7/1986,7:45 PM,111737,6.414,False,Product


In [10]:
employees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   First Name         933 non-null    object 
 1   Gender             855 non-null    object 
 2   Start Date         1000 non-null   object 
 3   Last Login Time    1000 non-null   object 
 4   Salary             1000 non-null   int64  
 5   Bonus %            1000 non-null   float64
 6   Senior Management  933 non-null    object 
 7   Team               957 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 62.6+ KB


In [11]:
# isnull() and notnull() functions
employees[employees['Team'].notnull() & employees["Senior Management"].isnull()].head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
7,,Female,7/20/2015,10:43 AM,45906,11.598,,Finance
25,,Male,10/8/2012,1:12 AM,37076,18.576,,Client Services
39,,Male,1/29/2016,2:33 AM,122173,7.797,,Client Services
51,,,12/17/2011,8:29 AM,41126,14.009,,Sales
62,,Female,6/12/2007,5:25 PM,58112,19.414,,Marketing


In [12]:
# between() function
employees[employees["Start Date"].between("7/01/2015", "7/20/2020")].head()

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
7,,Female,7/20/2015,10:43 AM,45906,11.598,,Finance
24,John,Male,7/1/1992,10:08 PM,97950,13.873,False,Client Services
27,Scott,,7/11/1991,6:58 PM,122367,5.218,False,Legal
69,Irene,,7/14/2015,4:31 PM,100863,4.382,True,Finance
75,Bonnie,Female,7/2/1991,1:27 AM,104897,5.118,True,Human Resources
