# String operations in pandas

    We can use the apply method to apply a string function to every value of a dataframe but that is not efficient as
    we are not using the full power of pandas. Pandas has a built in vectorized string operations which have been made
    specifically for the textual columns.

In [1]:
import numpy as np
import pandas as pd

In [2]:
chicago = pd.read_csv('datasets/chicago.csv')
chicago = chicago.dropna(how = 'all')
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


    Let us calculate null values in every column of chicago

In [3]:
nulls = {}

for column in chicago.columns:
    nulls[column] = chicago[column].value_counts(dropna = False).get(np.nan, 0)
    
nulls # we have 0 null values in each and every column of the dataframe.

{'Name': 0, 'Position Title': 0, 'Department': 0, 'Employee Annual Salary': 0}

In [4]:
positions = chicago['Position Title']
positions.head()

0            WATER RATE TAKER
1              POLICE OFFICER
2              POLICE OFFICER
3    CHIEF CONTRACT EXPEDITER
4           CIVIL ENGINEER IV
Name: Position Title, dtype: object

In [5]:
positions.str.lower()

0                      water rate taker
1                        police officer
2                        police officer
3              chief contract expediter
4                     civil engineer iv
                      ...              
32057    frm of machinists - automotive
32058                    police officer
32059                    police officer
32060                    police officer
32061           chief data base analyst
Name: Position Title, Length: 32062, dtype: object

In [6]:
positions.str.upper()

0                      WATER RATE TAKER
1                        POLICE OFFICER
2                        POLICE OFFICER
3              CHIEF CONTRACT EXPEDITER
4                     CIVIL ENGINEER IV
                      ...              
32057    FRM OF MACHINISTS - AUTOMOTIVE
32058                    POLICE OFFICER
32059                    POLICE OFFICER
32060                    POLICE OFFICER
32061           CHIEF DATA BASE ANALYST
Name: Position Title, Length: 32062, dtype: object

In [7]:
positions.str.title()

0                      Water Rate Taker
1                        Police Officer
2                        Police Officer
3              Chief Contract Expediter
4                     Civil Engineer Iv
                      ...              
32057    Frm Of Machinists - Automotive
32058                    Police Officer
32059                    Police Officer
32060                    Police Officer
32061           Chief Data Base Analyst
Name: Position Title, Length: 32062, dtype: object

In [8]:
positions.str.len()

0        16
1        14
2        14
3        24
4        17
         ..
32057    30
32058    14
32059    14
32060    14
32061    23
Name: Position Title, Length: 32062, dtype: int64

# strip, lstrip, rstrip methods

In [9]:
departments = chicago['Department']
departments

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [10]:
departments.str.strip()

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [11]:
departments.str.lstrip()

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [12]:
departments.str.rstrip()

0             WATER MGMNT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4             WATER MGMNT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object

In [13]:
departments.str.replace('MGMNT', "MANAGEMENT")

0        WATER MANAGEMENT
1                  POLICE
2                  POLICE
3        GENERAL SERVICES
4        WATER MANAGEMENT
               ...       
32057    GENERAL SERVICES
32058              POLICE
32059              POLICE
32060              POLICE
32061                DoIT
Name: Department, Length: 32062, dtype: object