In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("NYC_Jobs.csv")
data = data[['Job ID','Civil Service Title','Agency','Posting Type','Job Category','Salary Range From','Salary Range To']]
data.head()

Unnamed: 0,Job ID,Civil Service Title,Agency,Posting Type,Job Category,Salary Range From,Salary Range To
0,424339,PUBLIC HEALTH NURSE,DEPT OF HEALTH/MENTAL HYGIENE,External,Health,84252.0,84252.0
1,379094,CERT IT DEVELOPER (APP),NYC EMPLOYEES RETIREMENT SYS,External,"Technology, Data & Innovation",82884.0,116391.0
2,520417,EXECUTIVE AGENCY COUNSEL,NYC HOUSING AUTHORITY,External,Legal Affairs,105000.0,125000.0
3,233549,CERTIFIED IT ADMINISTRATOR (LA,NYC EMPLOYEES RETIREMENT SYS,External,Information Technology & Telecommunications,87203.0,131623.0
4,510256,ASSOCIATE HUMAN RIGHTS SPECIAL,HUMAN RIGHTS COMMISSION,External,Constituent Services & Community Programs,58449.0,67216.0


In [3]:
# Talked about apply in more detail in another video
# see it here: https://youtu.be/DsjvCKxOdgI
# apply is good for applying a function to either axis or the whole dataframe at once
def capitalize(text):
    return text.lower().capitalize()

data['Civil Service Title'].apply(capitalize)

0                  Public health nurse
1              Cert it developer (app)
2             Executive agency counsel
3       Certified it administrator (la
4       Associate human rights special
                     ...              
3768    Certified it administrator (la
3769     Associate housing development
3770               Senior it architect
3771                         Economist
3772           Agency attorney interne
Name: Civil Service Title, Length: 3773, dtype: object

In [27]:
# applymap applies a function to each element of the dataframe
# difference between apply and applymap is that apply passes the dataframe to the function
# one axis at a time
# applymap passes one element at a time

def add_year(text):
    return str(text) + '_2022'

data.applymap(add_year)

Unnamed: 0,Job ID,Civil Service Title,Agency,Posting Type,Job Category,Salary Range From,Salary Range To
0,424339_2022,PUBLIC HEALTH NURSE_2022,DEPT OF HEALTH/MENTAL HYGIENE_2022,External_2022,Health_2022,84252.0_2022,84252.0_2022
1,379094_2022,CERT IT DEVELOPER (APP)_2022,NYC EMPLOYEES RETIREMENT SYS_2022,External_2022,"Technology, Data & Innovation_2022",82884.0_2022,116391.0_2022
2,520417_2022,EXECUTIVE AGENCY COUNSEL_2022,NYC HOUSING AUTHORITY_2022,External_2022,Legal Affairs_2022,105000.0_2022,125000.0_2022
3,233549_2022,CERTIFIED IT ADMINISTRATOR (LA_2022,NYC EMPLOYEES RETIREMENT SYS_2022,External_2022,Information Technology & Telecommunications_2022,87203.0_2022,131623.0_2022
4,510256_2022,ASSOCIATE HUMAN RIGHTS SPECIAL_2022,HUMAN RIGHTS COMMISSION_2022,External_2022,Constituent Services & Community Programs_2022,58449.0_2022,67216.0_2022
...,...,...,...,...,...,...,...
3768,457839_2022,CERTIFIED IT ADMINISTRATOR (LA_2022,NYC EMPLOYEES RETIREMENT SYS_2022,Internal_2022,"Technology, Data & Innovation_2022",85371.0_2022,119883.0_2022
3769,527141_2022,ASSOCIATE HOUSING DEVELOPMENT_2022,HOUSING PRESERVATION & DVLPMNT_2022,Internal_2022,"Engineering, Architecture, & Planning_2022",74650.0_2022,85847.0_2022
3770,509443_2022,SENIOR IT ARCHITECT_2022,FINANCIAL INFO SVCS AGENCY_2022,Internal_2022,"Technology, Data & Innovation_2022",100000.0_2022,115000.0_2022
3771,352367_2022,ECONOMIST_2022,DEPT OF HEALTH/MENTAL HYGIENE_2022,Internal_2022,"Finance, Accounting, & Procurement_2022",42288.0_2022,59400.0_2022


In [19]:
# map works only on series and its main strength is to replace values
# so given a series like that
s = pd.Series(['cat', 'dog', 'NaN', 'rabbit'])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [20]:
# we can change the values using a dict only
s.map({'cat': 'kitten', 'dog': 'puppy'})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [28]:
# but you can also still pass a function to it
# this, however, can be done using apply too, so nothing special
# thus, map is mainly useful for when you want to completely map elements from
# one value to another
def change_word(title):
    title = title.replace('DEPT', 'kitten')
    return title
    
data['Agency'].map(change_word)

# it is also useful if you'd like to update the value in the same way for each element
data['Agency'].map('The position is created by {}'.format)

0       The position is created by DEPT OF HEALTH/MENT...
1       The position is created by NYC EMPLOYEES RETIR...
2        The position is created by NYC HOUSING AUTHORITY
3       The position is created by NYC EMPLOYEES RETIR...
4       The position is created by HUMAN RIGHTS COMMIS...
                              ...                        
3768    The position is created by NYC EMPLOYEES RETIR...
3769    The position is created by HOUSING PRESERVATIO...
3770    The position is created by FINANCIAL INFO SVCS...
3771    The position is created by DEPT OF HEALTH/MENT...
3772    The position is created by HUMAN RIGHTS COMMIS...
Name: Agency, Length: 3773, dtype: object

In [29]:
# for both map and applymap, you can specify na_action to ignore,
# so that NA values will not even be passed to the function
# the result does not change

s.map({'cat': 'kitten', 'dog': 'puppy'}, na_action='')


0    kitten
1     puppy
2       NaN
3       NaN
dtype: object