# WORKING WITH PANDAS

In [32]:
import numpy as np
import pandas as pd

In [33]:
employees = pd.read_csv("./data/employees.csv", parse_dates=["Start Date"])
employees

  employees = pd.read_csv("./data/employees.csv", parse_dates=["Start Date"])


Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,,True,Marketing
1,Thomas,Male,1996-03-31,61933.0,True,
2,Maria,Female,NaT,130590.0,False,Finance
3,Jerry,,2005-03-04,138705.0,True,Finance
4,Larry,Male,1998-01-24,101004.0,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392.0,False,Finance
997,Russell,Male,2013-05-20,96914.0,False,Product
998,Larry,Male,2013-04-20,60500.0,False,Business Dev
999,Albert,Male,2012-05-15,129949.0,True,Sales


In [34]:
# Inspect your data
employees.info()        # info() method gives information about your data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1001 entries, 0 to 1000
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   First Name  933 non-null    object        
 1   Gender      854 non-null    object        
 2   Start Date  999 non-null    datetime64[ns]
 3   Salary      999 non-null    float64       
 4   Mgmt        933 non-null    object        
 5   Team        957 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 47.1+ KB


### Converting the data types using `astype()` method

In [35]:
employees["Mgmt"] = employees["Mgmt"].astype(bool)
employees["Mgmt"].dtype

dtype('bool')

In [36]:
employees["Salary"] = employees["Salary"].fillna(0).astype(int)
employees["Salary"].dtype

dtype('int64')

In [37]:
employees["Gender"] = employees["Gender"].astype("category")
employees["Team"] = employees["Team"].astype("category")
employees["Team"].dtype

CategoricalDtype(categories=['Business Dev', 'Distribution', 'Engineering', 'Finance',
                  'HR', 'IT', 'Legal', 'Marketing', 'Product', 'Sales'],
, ordered=False, categories_dtype=object)

### Filtering by a Single Condition

In [38]:
# Creating a condition
marias = employees["First Name"] == "Maria"
# filtering based on the condition / criteria
employees[marias]       # employees[employees["First Name"] == "Maria"]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
2,Maria,Female,NaT,130590,False,Finance
198,Maria,Female,1990-12-27,36067,True,Product
815,Maria,,1986-01-18,106562,False,HR
844,Maria,,1985-06-19,148857,False,Legal
936,Maria,Female,2003-03-14,96250,False,Business Dev
984,Maria,Female,2011-10-15,43455,False,Engineering


In [39]:
# Creating a condition
finance = employees["Team"] == "Finance"
non_finance = employees["Team"] != "Finance"
employees[finance]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
7,,Female,2015-07-20,45906,True,Finance
14,Kimberly,Female,1999-01-14,41426,True,Finance
46,Bruce,Male,2009-11-28,114796,False,Finance
...,...,...,...,...,...,...
907,Elizabeth,Female,1998-07-27,137144,False,Finance
954,Joe,Male,1980-01-19,119667,True,Finance
987,Gloria,Female,2014-12-08,136709,True,Finance
992,Anthony,Male,2011-10-16,112769,True,Finance


In [40]:
employees[employees["Mgmt"]]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
6,Ruby,Female,1987-08-17,65476,True,Product
...,...,...,...,...,...,...
992,Anthony,Male,2011-10-16,112769,True,Finance
993,Tina,Female,1997-05-15,56450,True,Engineering
994,George,Male,2013-06-21,98874,True,Marketing
999,Albert,Male,2012-05-15,129949,True,Sales


In [41]:
high_salaries = employees["Salary"] > 100000
employees[high_salaries]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
9,Frances,Female,2002-08-08,139852,True,Business Dev
...,...,...,...,...,...,...
990,Robin,Female,1987-07-24,100765,True,IT
991,Rose,Female,2002-08-25,134505,True,Marketing
992,Anthony,Male,2011-10-16,112769,True,Finance
995,Henry,,2014-11-23,132483,False,Distribution


### Filtering by Multiple Conditions

In [42]:

'''
& - for logical and operation
| - for logical or operation
'''

'\n& - for logical and operation\n| - for logical or operation\n'

In [43]:
# Creating two conditions
is_female = employees["Gender"] == "Female"
is_biz_dev = employees["Team"] == "Business Dev"
# aplly both conditions using & (and) operator
employees[is_female & is_biz_dev]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
9,Frances,Female,2002-08-08,139852,True,Business Dev
33,Jean,Female,1993-12-18,119082,False,Business Dev
36,Rachel,Female,2009-02-16,142032,False,Business Dev
38,Stephanie,Female,1986-09-13,36844,True,Business Dev
61,Denise,Female,2001-11-06,106862,False,Business Dev
66,Nancy,Female,2012-12-15,125250,True,Business Dev
92,Linda,Female,2000-05-25,119009,True,Business Dev
111,Bonnie,Female,1999-12-17,42153,True,Business Dev
114,Ashley,Female,2002-08-04,58698,True,Business Dev
118,Andrea,Female,2012-01-12,120204,False,Business Dev


In [44]:
# two criteria
salary_below_40k = employees["Salary"] < 40000
started_after_2015 = employees["Start Date"] > "2015-01-01"
employees[salary_below_40k | started_after_2015]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
7,,Female,2015-07-20,45906,True,Finance
15,Lillian,Female,2016-06-05,59414,False,Product
25,,Male,2012-10-08,37076,True,IT
26,Craig,Male,2000-02-27,37598,True,Marketing
...,...,...,...,...,...,...
958,Gloria,Female,1987-10-24,39833,False,Engineering
964,Bruce,Male,1980-05-07,35802,True,Sales
967,Thomas,Male,2016-03-12,105681,False,Engineering
989,Justin,,1991-02-10,38344,False,Legal


### Inversion with `~`

In [45]:
myseries = pd.Series([True, False, True])
~myseries

0    False
1     True
2    False
dtype: bool

In [46]:
below_100k_salaries = employees["Salary"] < 100000
employees[~below_100k_salaries]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
9,Frances,Female,2002-08-08,139852,True,Business Dev
...,...,...,...,...,...,...
990,Robin,Female,1987-07-24,100765,True,IT
991,Rose,Female,2002-08-25,134505,True,Marketing
992,Anthony,Male,2011-10-16,112769,True,Finance
995,Henry,,2014-11-23,132483,False,Distribution


### Methods for Booleans

In [47]:
all_star_teams = ["Legal", "Sales", "Marketing"]
# the isin() is a boolean method for filter a column for more than a single value
on_star_teams = employees["Team"].isin(all_star_teams)
employees[on_star_teams]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
5,Dennis,Male,1987-04-18,115163,False,Legal
11,Julie,Female,1997-10-26,102508,True,Legal
13,Gary,Male,2008-01-27,109831,False,Sales
20,Lois,,1995-04-22,64714,True,Legal
...,...,...,...,...,...,...
986,Donna,Female,1982-11-26,82871,False,Marketing
989,Justin,,1991-02-10,38344,False,Legal
991,Rose,Female,2002-08-25,134505,True,Marketing
994,George,Male,2013-06-21,98874,True,Marketing


In [48]:
# between() is a boolean method for filter values based on a range
between_80k_to_100k = employees["Salary"].between(80000, 100000)
employees[between_80k_to_100k]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
8,Angela,Female,2005-11-22,95570,True,Engineering
16,Jeremy,Male,2010-09-21,90370,False,HR
19,Donna,Female,2010-07-22,81014,False,Product
22,Joshua,,2012-03-08,90816,True,IT
24,John,Male,1992-07-01,97950,False,IT
...,...,...,...,...,...,...
982,Rose,Female,1982-04-06,91411,True,HR
985,Stephen,,1983-07-10,85668,False,Legal
986,Donna,Female,1982-11-26,82871,False,Marketing
994,George,Male,2013-06-21,98874,True,Marketing


In [49]:
# apply between method on dates
folks_80s = employees["Start Date"].between(
    left="1980-01-01",
    right="1990-01-01"
)
employees[folks_80s]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
10,Louise,Female,1980-08-12,63241,True,
12,Brandon,Male,1980-12-01,112807,True,HR
17,Shawn,Male,1986-12-07,111737,False,Product
...,...,...,...,...,...,...
983,John,Male,1982-12-23,146907,False,Engineering
985,Stephen,,1983-07-10,85668,False,Legal
986,Donna,Female,1982-11-26,82871,False,Marketing
990,Robin,Female,1987-07-24,100765,True,IT


In [50]:
# between() method on text values
names_starting_with_r = employees["First Name"].between("R", "S")
employees[names_starting_with_r]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
6,Ruby,Female,1987-08-17,65476,True,Product
36,Rachel,Female,2009-02-16,142032,False,Business Dev
45,Roger,Male,1980-04-17,88010,True,Sales
67,Rachel,Female,1999-08-16,51178,True,Finance
78,Robin,Female,1983-06-04,114797,True,Sales
...,...,...,...,...,...,...
973,Russell,Male,2013-05-10,137359,False,Business Dev
982,Rose,Female,1982-04-06,91411,True,HR
990,Robin,Female,1987-07-24,100765,True,IT
991,Rose,Female,2002-08-25,134505,True,Marketing


### The `isnull()` and `notnull()` methods

In [51]:
# no team is specified, means missing
no_team = employees["Team"].isnull()
employees[no_team]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
1,Thomas,Male,1996-03-31,61933,True,
10,Louise,Female,1980-08-12,63241,True,
23,,Male,2012-06-14,125792,True,
32,,Male,1998-08-21,122340,True,
91,James,,2005-01-26,128771,False,
109,Christopher,Male,2000-04-22,37919,False,
139,,Female,1990-10-03,132373,True,
199,Jonathan,Male,2009-07-17,130581,True,
258,Michael,Male,2002-01-24,43586,False,
290,Jeremy,Male,1988-06-14,129460,True,


In [52]:
# rows with non-missing values for Team column
yes_team = employees["Team"].notnull()
employees[yes_team]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [53]:
employees[~no_team]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [54]:
employees[~yes_team]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
1,Thomas,Male,1996-03-31,61933,True,
10,Louise,Female,1980-08-12,63241,True,
23,,Male,2012-06-14,125792,True,
32,,Male,1998-08-21,122340,True,
91,James,,2005-01-26,128771,False,
109,Christopher,Male,2000-04-22,37919,False,
139,,Female,1990-10-03,132373,True,
199,Jonathan,Male,2009-07-17,130581,True,
258,Michael,Male,2002-01-24,43586,False,
290,Jeremy,Male,1988-06-14,129460,True,


### Dealing with Null values `dropna()`

In [55]:
# drop rows with missing values
employees.dropna()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
8,Angela,Female,2005-11-22,95570,True,Engineering
...,...,...,...,...,...,...
994,George,Male,2013-06-21,98874,True,Marketing
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [56]:
employees.dropna(thresh=1)   # keep those rows with at least one non-missing value

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


In [57]:
employees.dropna(thresh=2)   # keep those rows with at least two non-missing value

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


In [58]:
employees.dropna(thresh=3)   # keep those rows with at least three non-missing value

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [59]:
employees.dropna(thresh=4)   # keep those rows with at least four non-missing value

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [60]:
employees.dropna(thresh=5)   # keep those rows with at least five non-missing value

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
995,Henry,,2014-11-23,132483,False,Distribution
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [61]:
# how parameter
employees.dropna(how="any")     # if there is 'any' missing value is a row then it drops it

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
8,Angela,Female,2005-11-22,95570,True,Engineering
...,...,...,...,...,...,...
994,George,Male,2013-06-21,98874,True,Marketing
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev


In [62]:
# how parameter
employees.dropna(how="all")   # if contains 'all' missing values then drop such rows

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


### Using duplicated() method to identify duplicates based on Boolean outcome

- The following two commands are equivalent
    - employees["Team"].duplicated()
    - employees["Team"].duplicated(keep="first")
    - Returns False for first occurance and True for all the other occurances
- The following
    - employees["Team"].duplicated(keep="last")
    - Returns False for last occurance and True for all the other occurances

In [63]:
employees["Team"].head(20)

0        Marketing
1              NaN
2          Finance
3          Finance
4               IT
5            Legal
6          Product
7          Finance
8      Engineering
9     Business Dev
10             NaN
11           Legal
12              HR
13           Sales
14         Finance
15         Product
16              HR
17         Product
18              IT
19         Product
Name: Team, dtype: category
Categories (10, object): ['Business Dev', 'Distribution', 'Engineering', 'Finance', ..., 'Legal', 'Marketing', 'Product', 'Sales']

In [None]:
employees["Team"].duplicated().head(20)

0     False
1     False
2     False
3      True
4     False
5     False
6     False
7      True
8     False
9     False
10     True
11     True
12    False
13    False
14     True
15     True
16     True
17     True
18     True
19     True
Name: Team, dtype: bool

In [65]:
~employees["Team"].duplicated()

0        True
1        True
2        True
3       False
4        True
        ...  
996     False
997     False
998     False
999     False
1000    False
Name: Team, Length: 1001, dtype: bool

In [75]:
employees[~employees["Team"].duplicated()]

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
8,Angela,Female,2005-11-22,95570,True,Engineering
9,Frances,Female,2002-08-08,139852,True,Business Dev
12,Brandon,Male,1980-12-01,112807,True,HR
13,Gary,Male,2008-01-27,109831,False,Sales


In [74]:
employees["Team"].duplicated(keep="first")  # this is default, first occurance are False, duplicates are True

0       False
1       False
2       False
3        True
4       False
        ...  
996      True
997      True
998      True
999      True
1000     True
Name: Team, Length: 1001, dtype: bool

In [73]:
employees["Team"].duplicated(keep="last")      # all the occurances are True, except the last occurance

0        True
1        True
2        True
3        True
4        True
        ...  
996     False
997     False
998     False
999     False
1000    False
Name: Team, Length: 1001, dtype: bool

### The `drop_duplicates()` method to drop the duplicates

- The following two commands are equivalent
    - employees.drop_duplicates()
    - employees.drop_duplicates(keep="first")
    - Drops the rows by keeps the first occurances

In [76]:
employees.drop_duplicates()

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
...,...,...,...,...,...,...
996,Phillip,Male,1984-01-31,42392,False,Finance
997,Russell,Male,2013-05-20,96914,False,Product
998,Larry,Male,2013-04-20,60500,False,Business Dev
999,Albert,Male,2012-05-15,129949,True,Sales


#### Use the `subset` parameter to specify based on what columns you want to drop the rows

In [78]:
# The following to statements are equivalent
employees.drop_duplicates(subset=["Team"], keep="first")
employees.drop_duplicates(subset=["Team"])  # be default, it keeps the first occurance, the drops all the other occurances

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
8,Angela,Female,2005-11-22,95570,True,Engineering
9,Frances,Female,2002-08-08,139852,True,Business Dev
12,Brandon,Male,1980-12-01,112807,True,HR
13,Gary,Male,2008-01-27,109831,False,Sales


In [80]:
# dropping rows based on more than one column
employees.drop_duplicates(subset=["Team", "Mgmt"])

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
0,Douglas,Male,1993-08-06,0,True,Marketing
1,Thomas,Male,1996-03-31,61933,True,
2,Maria,Female,NaT,130590,False,Finance
3,Jerry,,2005-03-04,138705,True,Finance
4,Larry,Male,1998-01-24,101004,True,IT
5,Dennis,Male,1987-04-18,115163,False,Legal
6,Ruby,Female,1987-08-17,65476,True,Product
8,Angela,Female,2005-11-22,95570,True,Engineering
9,Frances,Female,2002-08-08,139852,True,Business Dev
11,Julie,Female,1997-10-26,102508,True,Legal


In [81]:
# last non-duplicates rows, by drop all the earlier rows
employees.drop_duplicates(subset=["Team", "Mgmt"], keep="last")

Unnamed: 0,First Name,Gender,Start Date,Salary,Mgmt,Team
912,Joe,Male,1998-12-08,126120,False,
946,,Female,1985-09-15,133472,True,Distribution
965,Catherine,Female,1989-09-25,68164,False,IT
971,Patrick,Male,2002-12-30,75423,True,Business Dev
975,Susan,Female,1995-04-07,92436,False,Sales
976,Denise,Female,1992-10-19,137954,True,Legal
979,Ernest,Male,2013-07-20,142935,True,Product
982,Rose,Female,1982-04-06,91411,True,HR
984,Maria,Female,2011-10-15,43455,False,Engineering
986,Donna,Female,1982-11-26,82871,False,Marketing
