#### Import Statements

In [1]:
import numpy as np
import pandas as pd

#### Loading the Data Files

---

In [2]:
chicago_employees = pd.read_csv("PandasSampleData/chicago.csv").dropna(how="all")

---

## Working with Strings 

- Filtering with string methods

In [3]:
# .str.lower() is used to make sure that all the strings in the series is in the same format.
mask = chicago_employees["Position Title"].str.lower().str.contains("water")
chicago_employees[mask]

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
554,"ALUISE, VINCENT G",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,$82044.00
685,"ANDERSON, ANDREW J",DISTRICT SUPERINTENDENT OF WATER DISTRIBUTION,WATER MGMNT,$109272.00
702,"ANDERSON, DONALD",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00
...,...,...,...,...
29669,"VERMA, ANUPAM",MANAGING ENGINEER - WATER MANAGEMENT,WATER MGMNT,$111192.00
30239,"WASHINGTON, JOSEPH",WATER CHEMIST III,WATER MGMNT,$89676.00
30544,"WEST, THOMAS R",GEN SUPT OF WATER MANAGEMENT,WATER MGMNT,$115704.00
30991,"WILLIAMS, MATTHEW",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00


In [4]:
chicago_employees.rename(columns={"Name": "Full Name"}, inplace=True)

- Applying string methods to remove extra white spaces in start or end of a string

In [5]:
chicago_employees["Full Name"].str.strip();  # remove extra white spaces both from right and left of a string
chicago_employees["Full Name"].str.rstrip();  # remove extra white spaces from right of a string
chicago_employees["Full Name"].str.lstrip();  # remove extra white spaces from left of a string

#### Preparing the chicago_employees DataFrame for Data Analysis using different string methods

In [6]:
# chicago_employees.info()

In [7]:
chicago_employees["Department"] = chicago_employees["Department"].str.replace("MGMNT", "Management")

for col in chicago_employees.columns:
    chicago_employees[col] = chicago_employees[col].str.title()
    
chicago_employees.rename(columns={"Employee Annual Salary": "Annual Salary", "Name": "Full Name"}, inplace=True)

> Note : .split() also takes expand and n as params. expand=True returns a DF and n is the maximum allowed numbers of splits. 

In [8]:
chicago_employees[["First Name", "Last Name"]] = chicago_employees["Full Name"].str.strip().str.split(",", expand=True)

In [9]:
chicago_employees["Annual Salary"] = chicago_employees["Annual Salary"].str.replace("$", "").astype(float)

# chicago_employees["Department"].nunique()
chicago_employees["Department"] = chicago_employees["Department"].astype("category")

In [11]:
chicago_employees.head(3)

Unnamed: 0,Full Name,Position Title,Department,Annual Salary,First Name,Last Name
0,"Aaron, Elvia J",Water Rate Taker,Water Management,90744.0,Aaron,Elvia J
1,"Aaron, Jeffery M",Police Officer,Police,84450.0,Aaron,Jeffery M
2,"Aaron, Karina",Police Officer,Police,84450.0,Aaron,Karina
