In [2]:
import pandas as pd

## Memory Optimization

In [9]:
df = pd.read_csv("employees.csv")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.17,True,
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance


In [13]:
df.dtypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   First Name         933 non-null    object 
 1   Gender             855 non-null    object 
 2   Start Date         1000 non-null   object 
 3   Last Login Time    1000 non-null   object 
 4   Salary             1000 non-null   int64  
 5   Bonus %            1000 non-null   float64
 6   Senior Management  933 non-null    object 
 7   Team               957 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 62.6+ KB


In [17]:
df["Start Date"].head()
# read as objects/strings
# datetime operations can't work on strings

0     8/6/1993
1    3/31/1996
2    4/23/1993
3     3/4/2005
4    1/24/1998
Name: Start Date, dtype: object

In [25]:
df["Start Date"] = pd.to_datetime(df["Start Date"]) # changes date dtype
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-17 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-17 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-17 11:17:00,130590,11.858,False,Finance


In [26]:
df["Last Login Time"] = pd.to_datetime(df["Last Login Time"])
df.head(3) # no specified date will make it the date today

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-17 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-17 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-17 11:17:00,130590,11.858,False,Finance


In [35]:
df["Senior Management"] = df["Senior Management"].astype("bool")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-17 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-17 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-17 11:17:00,130590,11.858,False,Finance


In [38]:
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.info() # memory usage is lower
df.head(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   First Name         933 non-null    object        
 1   Gender             855 non-null    category      
 2   Start Date         5 non-null      datetime64[ns]
 3   Last Login Time    1000 non-null   datetime64[ns]
 4   Salary             1000 non-null   int64         
 5   Bonus %            1000 non-null   float64       
 6   Senior Management  1000 non-null   bool          
 7   Team               957 non-null    category      
dtypes: bool(1), category(2), datetime64[ns](2), float64(1), int64(1), object(1)
memory usage: 42.6+ KB


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-17 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-17 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-17 11:17:00,130590,11.858,False,Finance


In [44]:
# import code for this module (can ignore the lines with hashtag)
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
# df["Start Date"] = pd.to_datetime(df["Start Date"])
# df["Last Login Time"] = pd.to_datetime(df["Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-17 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-17 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-17 11:17:00,130590,11.858,False,Finance


## Filter A `DataFrame` Based On A Condition

- making subsets

### Based on one condition

In [3]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [5]:
df[df["Gender"] == "Male"]
df.head(3)
# Note: double equal sign is to compare, single equal sign is to assign a value
# pandas knows to extract only the ones with a "True" value

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [12]:
# option 1: a bit confusing because of the df[df[]]
#df[df["Team"] == "Finance"]

# option 2: much cleaner
mask = df["Team"] == "Finance" 
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance
3,Jerry,Male,2005-03-04,2020-12-18 13:00:00,138705,9.34,True,Finance
7,,Female,2015-07-20,2020-12-18 10:43:00,45906,11.598,True,Finance


In [21]:
df[df["Senior Management"]].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
3,Jerry,Male,2005-03-04,2020-12-18 13:00:00,138705,9.34,True,Finance


In [22]:
mask = df["Team"] != "Marketing"
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance
3,Jerry,Male,2005-03-04,2020-12-18 13:00:00,138705,9.34,True,Finance
4,Larry,Male,1998-01-24,2020-12-18 16:47:00,101004,1.389,True,Client Services


In [25]:
mask = df["Salary"] > 110000
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance
3,Jerry,Male,2005-03-04,2020-12-18 13:00:00,138705,9.34,True,Finance
5,Dennis,Male,1987-04-18,2020-12-18 01:35:00,115163,10.125,False,Legal


In [27]:
mask = df["Bonus %"] < 1.5
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
4,Larry,Male,1998-01-24,2020-12-18 16:47:00,101004,1.389,True,Client Services
15,Lillian,Female,2016-06-05,2020-12-18 06:09:00,59414,1.256,False,Product
58,Theresa,Female,2010-04-11,2020-12-18 07:18:00,72670,1.481,True,Engineering


In [30]:
mask = df["Start Date"] <= "1985-01-01"
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
10,Louise,Female,1980-08-12,2020-12-18 09:01:00,63241,15.132,True,
12,Brandon,Male,1980-12-01,2020-12-18 01:08:00,112807,17.492,True,Human Resources
18,Diana,Female,1981-10-23,2020-12-18 10:27:00,132940,19.082,False,Client Services


### More than one condition

In [39]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [40]:
# AND condition (&)
mask1 = df["Gender"] == "Male"
mask2 = df["Team"] == "Marketing"

df[mask1 & mask2].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
21,Matthew,Male,1995-09-05,2020-12-18 02:12:00,100612,13.645,False,Marketing
26,Craig,Male,2000-02-27,2020-12-18 07:45:00,37598,7.757,True,Marketing


In [50]:
# OR condition (|)
mask1 = df["Senior Management"]
mask2 = df["Start Date"] < "1990-01-01" 
df[mask1 | mask2].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
3,Jerry,Male,2005-03-04,2020-12-18 13:00:00,138705,9.34,True,Finance


In [49]:
mask1 = df["First Name"] == "Robert"
mask2 = df["Team"] == "Client Services"
mask3 = df["Start Date"] > "2016-06-01"

df[(mask1 & mask2) | mask3]

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
15,Lillian,Female,2016-06-05,2020-12-18 06:09:00,59414,1.256,False,Product
98,Tina,Female,2016-06-16,2020-12-18 19:47:00,100705,16.961,True,Marketing
387,Robert,Male,1994-10-29,2020-12-18 04:26:00,123294,19.894,False,Client Services
451,Terry,,2016-07-15,2020-12-18 00:29:00,140002,19.49,True,Marketing


## The `.isin()` Method

In [51]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [60]:
# option 1: too long
mask1 = df["Team"] == "Legal"
mask2 = df["Team"] == "Sales"
mask3 = df["Team"] == "Product"

df[mask1 | mask2 | mask3].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
5,Dennis,Male,1987-04-18,2020-12-18 01:35:00,115163,10.125,False,Legal
6,Ruby,Female,1987-08-17,2020-12-18 16:20:00,65476,10.012,True,Product
11,Julie,Female,1997-10-26,2020-12-18 15:19:00,102508,12.637,True,Legal


In [64]:
# option 2: more adaptable to change
mask = df["Team"].isin(["Legal","Product","Marketing"])
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
5,Dennis,Male,1987-04-18,2020-12-18 01:35:00,115163,10.125,False,Legal
6,Ruby,Female,1987-08-17,2020-12-18 16:20:00,65476,10.012,True,Product


## The `.isnull()` and `.notnull()` Methods

In [65]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [69]:
mask = df["Team"].isnull()

df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
10,Louise,Female,1980-08-12,2020-12-18 09:01:00,63241,15.132,True,
23,,Male,2012-06-14,2020-12-18 16:19:00,125792,5.042,True,


In [74]:
condition = df["Gender"].notnull()
df[condition].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


## The `.between()` Method

In [75]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [103]:
df[df["Salary"].between(60000, 70000)].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
937,Aaron,,1986-01-22,2020-12-18 19:39:00,63126,18.424,False,Client Services
372,Albert,Male,1997-02-01,2020-12-18 16:20:00,67827,19.717,True,Engineering


In [102]:
df[df["Bonus %"].between(2.0,5.0)].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
538,Adam,Male,2010-10-08,2020-12-18 21:53:00,45181,3.491,False,Human Resources
300,Alan,Male,1988-06-26,2020-12-18 03:54:00,111786,3.592,True,Engineering
425,Alice,Female,1986-05-02,2020-12-18 01:50:00,51395,2.378,True,Finance


In [100]:
df[df["Start Date"].between("1990-01-01", "1992-01-01")].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
440,Aaron,Male,1990-07-22,2020-12-18 14:53:00,52119,11.343,True,Client Services
141,Adam,Male,1990-12-24,2020-12-18 20:57:00,110194,14.727,True,Product
639,Amanda,,1991-08-11,2020-12-18 14:15:00,46665,19.391,True,Client Services


In [101]:
df[df["Last Login Time"].between("8:30AM","12:00PM")].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
302,Adam,Male,2007-07-05,2020-12-18 11:59:00,71276,5.027,True,Human Resources
988,Alice,Female,2004-10-05,2020-12-18 09:34:00,47638,11.209,False,Human Resources


## The `.duplicated()` Method

In [90]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.sort_values("First Name", inplace = True)
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
327,Aaron,Male,1994-01-29,2020-12-18 18:48:00,58755,5.097,True,Marketing
440,Aaron,Male,1990-07-22,2020-12-18 14:53:00,52119,11.343,True,Client Services


In [104]:
df[df["First Name"].duplicated(keep = "last")]
df[df["First Name"].duplicated(keep = False)].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
327,Aaron,Male,1994-01-29,2020-12-18 18:48:00,58755,5.097,True,Marketing
440,Aaron,Male,1990-07-22,2020-12-18 14:53:00,52119,11.343,True,Client Services


In [105]:
mask = ~df["First Name"].duplicated(keep = False) # ~ turns all false to positive and vice versa
df[mask].head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
8,Angela,Female,2005-11-22,2020-12-18 06:29:00,95570,18.523,True,Engineering
688,Brian,Male,2007-04-07,2020-12-18 22:47:00,93901,17.821,True,Legal
190,Carol,Female,1996-03-19,2020-12-18 03:39:00,57783,9.129,False,Finance


## The `.drop_duplicates()` Method

In [120]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.sort_values("First Name", inplace = True)
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
327,Aaron,Male,1994-01-29,2020-12-18 18:48:00,58755,5.097,True,Marketing
440,Aaron,Male,1990-07-22,2020-12-18 14:53:00,52119,11.343,True,Client Services


In [121]:
len(df) # number of rows

1000

In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 101 to 951
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   First Name         933 non-null    object        
 1   Gender             855 non-null    category      
 2   Start Date         1000 non-null   datetime64[ns]
 3   Last Login Time    1000 non-null   datetime64[ns]
 4   Salary             1000 non-null   int64         
 5   Bonus %            1000 non-null   float64       
 6   Senior Management  1000 non-null   bool          
 7   Team               957 non-null    category      
dtypes: bool(1), category(2), datetime64[ns](2), float64(1), int64(1), object(1)
memory usage: 50.3+ KB


In [123]:
len(df.drop_duplicates()) 
# only drops rows with identical values in more than one row

1000

In [133]:
df.drop_duplicates(subset = ["First Name"], keep = "first").head(3)
# will keep first occurrence 

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [126]:
df.drop_duplicates(subset = "Team", keep = False)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team


In [130]:
df.drop_duplicates(subset = ["First Name","Team"], inplace = True)
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2012-02-17,2020-12-18 10:20:00,61602,11.849,True,Marketing
440,Aaron,Male,1990-07-22,2020-12-18 14:53:00,52119,11.343,True,Client Services
137,Adam,Male,2011-05-21,2020-12-18 01:45:00,95327,15.12,False,Distribution


In [131]:
len(df)

769

## The `.unique()` and `.nunique()` Methods

In [132]:
df = pd.read_csv("employees.csv", parse_dates = ["Start Date","Last Login Time"])
df["Senior Management"] = df["Senior Management"].astype("bool")
df["Gender"] = df["Gender"].astype("category")
df["Team"] = df["Team"].astype("category")
df.head(3)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,1993-08-06,2020-12-18 12:42:00,97308,6.945,True,Marketing
1,Thomas,Male,1996-03-31,2020-12-18 06:53:00,61933,4.17,True,
2,Maria,Female,1993-04-23,2020-12-18 11:17:00,130590,11.858,False,Finance


In [137]:
df["Gender"].unique()
df["Team"].unique()

['Marketing', NaN, 'Finance', 'Client Services', 'Legal', ..., 'Engineering', 'Business Development', 'Human Resources', 'Sales', 'Distribution']
Length: 11
Categories (10, object): ['Marketing', 'Finance', 'Client Services', 'Legal', ..., 'Business Development', 'Human Resources', 'Sales', 'Distribution']

In [143]:
len(df["Team"].unique()) 
# counts number of unique values as a whole

11

In [144]:
df["Team"].nunique()
# counts number of unique values, but ignores null values
# set parameter dropna = False to include null values
# df["Team"].nunique(dropna = False)

10