# Categorizing Records in Pandas DataFrames

## 1. Import Libraries and Dependencies

In [20]:
# import dependencies
import pandas as pd
from pathlib import Path
%matplotlib inline

## 2. Set the File Path to the CSV using Pathlib

In [21]:
# Use the Pathlib libary to set the path to the CSV
csvpath = Path("../Resources/cleaned_people_data.csv")


## 3. Import the CSV into Pandas DataFrame

In [22]:
# Use the file path to read the CSV into a DataFrame and display a few rows
people_df = pd.read_csv(csvpath, index_col="Person_ID")
people_df.head()

Unnamed: 0_level_0,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age
Person_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,27
2,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,22
3,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,40
4,Unnamed,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62
5,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,47


## 4. Create Bins for Total Ranking

In [23]:
# Create the variable bins to define our bounds for when we cut the dataframe.
bins = [0, 30000, 70000, 100000, 200000]
bin_names = ['Low', 'Moderate', 'Above Average', 'High']

# Create names for the bins


## 5. Label Records According to the Corresponding Bin

In [24]:
# Add a column named "Salary Level" to people_df and slice the data into the bins
people_df["Salary Level"] = pd.cut(people_df["Salary"], bins, labels= bin_names)
people_df.tail(30)



Unnamed: 0_level_0,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age,Salary Level
Person_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
971,Perchard,Ennis,Male,University of Exeter,Accounting Assistant I,109716.0,eperchardqy@storify.com,64,High
972,Attfield,Sigrid,Female,Universidad de La Salle,Senior Quality Engineer,93769.0,sattfieldqz@nationalgeographic.com,42,Above Average
973,Spurdle,Kathi,Female,United States Sports Academy,VP Marketing,56669.0,kspurdler0@ucla.edu,48,Moderate
974,Haspineall,Antonio,Male,Lansbridge University,Legal Assistant,71608.0,ahaspineallr1@simplemachines.org,33,Above Average
975,Shoveller,Bernadina,Female,Midwestern State University,Assistant Manager,63355.0,bshovellerr2@ca.gov,33,Moderate
976,Pickett,Nanete,Female,Ecole Nationale Supérieure de Physique de Gren...,Human Resources Manager,108835.0,npickettr3@businesswire.com,61,High
977,Lethbrig,Hastie,Male,Ecole Polytechnique,Electrical Engineer,73842.0,hlethbrigr4@europa.eu,32,Above Average
978,McPhillimey,Danice,Female,Université d'Artois,Automation Specialist II,71578.0,dmcphillimeyr5@flickr.com,60,Above Average
979,Yurchenko,Kale,Male,Southeast Missouri State University,Biostatistician IV,54576.0,kyurchenkor6@netscape.com,54,Moderate
980,Eykel,Dillon,Male,Shorter College,Pharmacist,110623.0,deykelr7@independent.co.uk,58,High


In [25]:

people_df.dtypes

Last_Name         object
First_Name        object
Gender            object
University        object
Occupation        object
Salary           float64
Email             object
Age                int64
Salary Level    category
dtype: object