# NZ Admin jobs data exploration

## 1. Load data and overview

In [1]:
import pandas as pd
import re

In [2]:
# load file
df_admin = pd.read_excel('NZ_Admin_JOBS.xlsx')

In [3]:
# get info
df_admin.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2708 entries, 0 to 2707
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   字段1       2708 non-null   object
 1   字段1_link  2708 non-null   object
 2   字段2       2686 non-null   object
 3   字段3       2708 non-null   object
 4   字段4       2708 non-null   object
 5   字段5       2708 non-null   object
dtypes: object(6)
memory usage: 127.1+ KB


In [4]:
# check missing
df_admin.isnull().sum().sort_values(ascending=False)

字段2         22
字段1          0
字段1_link     0
字段3          0
字段4          0
字段5          0
dtype: int64

In [5]:
pd.set_option('display.max_colwidth',None)

#### Total 2,708 observations, 6 columns
#### No column name, all data are object

In [6]:
df_admin.describe()

Unnamed: 0,字段1,字段1_link,字段2,字段3,字段4,字段5
count,2708,2708,2686,2708,2708,2708
unique,548,2708,475,147,54,93
top,Executive Assistant,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Beyond Recruitment - Winner – Best Innovation –2018 Global Recruiter Awards,location: AucklandAucklandarea: Auckland CentralAuckland Central,"27d ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants
freq,301,1,285,654,572,715


#### '字段1_link' could be index
#### 22 Missing value in '字段2'
#### Column '字段1' contains position
#### Column '字段1_link'contains link
#### Column '字段2'  contains company name
#### Column '字段3' contains location
#### Column '字段4' shows release time
#### Column '字段5' contains position classification

In [7]:
df_admin.head(5)

Unnamed: 0,字段1,字段1_link,字段2,字段3,字段4,字段5
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Avenues Orthodontics,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: ReceptionistsReceptionists
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,New Zealand Police,location: AucklandAuckland,"4d ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Kew Pacific Island Early Learning Centre,location: SouthlandSouthlandarea: InvercargillInvercargill,"1h ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: CanterburyCanterburyarea: ChristchurchChristchurch,"4d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration


### Poor data quality
#### 1. No column name;
#### 2. No index;
#### 3. Null value;
#### 4. Duplicate value;
#### 5. Upper case and lower case mixed;
#### 6. Abbreviation and full name mixed;
#### 7. Time data is not clear
#### 8. Missing data
#### 9. Data mixed in one column
#### 10. Salary format is different;
#### 11. Classification is too long. Parent classification are the same.

## 2. Data preprocess

#### 2.1 Add column name

In [8]:
# add column name first
column_name = ['Position','Link','Company','Location','Release_time','Classification']
df_admin.columns = column_name
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management


#### 2.2 Clean column 'Position'

#### 'office administrator' and 'Office Administrator' should be the same position

#### 2.2.1 Capitalize each word in Column 'Position'

In [9]:
# Capitalize the value
df_admin['Position'] = df_admin['Position'].str.title()
df_admin.head(5)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Avenues Orthodontics,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: ReceptionistsReceptionists
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,New Zealand Police,location: AucklandAuckland,"4d ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Kew Pacific Island Early Learning Centre,location: SouthlandSouthlandarea: InvercargillInvercargill,"1h ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: CanterburyCanterburyarea: ChristchurchChristchurch,"4d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration


#### Column 'Position' inlcude work type description, like 'full time','part time','10hours per week','temp'...
#### 2.2.2 Create and fill in Column 'Work type'

In [10]:
# select work type
print(df_admin['Position'].str.contains("[Ff]ull").sum()) #5
print(df_admin['Position'].str.contains("[Pp]art").sum()) #41
print(df_admin['Position'].str.contains("[Ff]ix").sum()) #20
print(df_admin['Position'].str.contains("[Pp]er").sum()) #308
print(df_admin['Position'].str.contains("[Cc]ontract").sum()) #96
df_admin['Worktype'] = df_admin.loc[df_admin['Position'].str.contains("[Ff]ull|[Pp]art|[Ff]ix|[Pp]er|[Cc]ontract"),'Position']


5
41
20
308
96


#### 2.2.3 Clean column 'Link'
#### job/50582301? number between'/' and '?' seems company ID

In [11]:
# filter companyID from Link

pattern = r"\d{8}"
df_admin['CompanyID'] = df_admin['Link'].apply(lambda x: re.findall(pattern,x)[0])
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301


#### 2.2.4 Clean column 'Company' 

In [12]:
# To find 22 missing value records in Column 'Company'
df_admin[df_admin['Company'].isnull()].head(5)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: CanterburyCanterburyarea: ChristchurchChristchurch,"4d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration,,50622432
19,Administrator,https://www.seek.co.nz/job/50604829?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,,location: ManawatuManawatuarea: Rest of ManawatuRest of Manawatu,"7d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther,,50604829
60,Administrator,https://www.seek.co.nz/job/50582301?type=standout#searchRequestToken=3aa68777-3165-4e73-98ef-bae1b5e67bc7,,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"11d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301
64,Administration Specialist - Construction,https://www.seek.co.nz/job/50638765?type=standout#searchRequestToken=3aa68777-3165-4e73-98ef-bae1b5e67bc7,,location: AucklandAucklandarea: Rodney & North ShoreRodney & North Shore,"5h ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants,,50638765


#### I found that 22 null in company because they are private advertisers
#### Next, I will fill "Private Advertiser" in them

In [13]:
# fill null value by 'Private Advertiser'
df_admin['Company'] = df_admin['Company'].fillna(value = 'Private Advertiser')
df_admin.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2708 entries, 0 to 2707
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Position        2708 non-null   object
 1   Link            2708 non-null   object
 2   Company         2708 non-null   object
 3   Location        2708 non-null   object
 4   Release_time    2708 non-null   object
 5   Classification  2708 non-null   object
 6   Worktype        454 non-null    object
 7   CompanyID       2708 non-null   object
dtypes: object(8)
memory usage: 169.4+ KB


#### 2.2.5 Deal with column 'Location'
#### inlcude wage data, extra word like"location:"

In [14]:
# check number of wage data inlcuded in Column'Location'
print(df_admin['Location'].str.contains(',').sum()) # 603

603


#### 603 Location data include wage

In [15]:
# remove salary inforamtion from Column location
df_admin['Salary'] = df_admin['Location'].apply(lambda x: x.split(',')[1] if "," in x else "")

In [16]:
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,


In [17]:
# delete wage data in Column 'Locaiton'
df_admin['Location'] = df_admin['Location'].apply(lambda x: x.split(',')[0] if "," in x else x)

In [18]:
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,location: Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,


In [19]:
 # remove extra word "location:"
df_admin['Location'] = df_admin['Location'].apply(lambda x: x[9:])

In [20]:
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,


In [21]:
# split location by word"area:" and put province into Column Area
df_admin['Area'] = df_admin['Location'].apply(lambda x: x.split("area:")[0] if ":" in x else x)

In [22]:
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of PlentyBay of Plenty


In [23]:
# split location by word"area:" and put city/region into Column City
df_admin['City'] = df_admin['Location'].apply(lambda x: x.split("area:")[1] if ":" in x else x)

In [24]:
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of PlentyBay of Plenty,TaurangaTauranga


In [25]:
# drop repeat word
def drop_repeat(s):
    trim = s.strip()
    index = (trim + trim).find(trim,1)
    if index == -1:
        return trim
    else:
        return trim[:index]
  

In [26]:
# delete repeat words
df_admin['Area'] = df_admin['Area'].apply(lambda x: drop_repeat(x))
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of Plenty,TaurangaTauranga


In [27]:
# delete duplicat words
df_admin['City'] = df_admin['City'].apply(lambda x: drop_repeat(x))
df_admin.head(1)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of Plenty,Tauranga


In [28]:
# delete location
df_admin.drop(['Location'],axis = 1)

Unnamed: 0,Position,Link,Company,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,"Featured,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of Plenty,Tauranga
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Avenues Orthodontics,"Featured,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: ReceptionistsReceptionists,,50620889,,Bay of Plenty,Tauranga
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,New Zealand Police,"4d ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther,,50622169,,Auckland,Auckland
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Kew Pacific Island Early Learning Centre,"1h ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants,,50639620,,Southland,Invercargill
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,"4d ago,at,Private Advertiser",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration,,50622432,,Canterbury,Christchurch
...,...,...,...,...,...,...,...,...,...,...
2703,Key Account Manager,https://www.seek.co.nz/job/50490062?type=standout#searchRequestToken=c4e3452e-0c7b-47ee-8dcd-97b653d46ce8,Hays Talent Solutions,"27d ago,at",classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration,,50490062,,Auckland,Auckland Central
2704,Executive Assistant,https://www.seek.co.nz/job/50488000?type=standout#searchRequestToken=c4e3452e-0c7b-47ee-8dcd-97b653d46ce8,one eighty recruitment,"27d ago,at",Competitive hourly rate,,50488000,Competitive hourly rate,Wellington,Wellington Central
2705,Temporary Office Roles,https://www.seek.co.nz/job/50524865?type=standout#searchRequestToken=c4e3452e-0c7b-47ee-8dcd-97b653d46ce8,Asset Recruitment Ltd,"20d ago,at",Competitive hourly rates $$,,50524865,Competitive hourly rates $$,Waikato,Hamilton
2706,Temporary Office Roles,https://www.seek.co.nz/job/50477118?type=standout#searchRequestToken=c4e3452e-0c7b-47ee-8dcd-97b653d46ce8,Asset Recruitment Ltd,"28d ago,at",Competitive hourly rates $$,,50477118,Competitive hourly rates $$,Waikato,Hamilton


#### 2.2.6 Deal with column 'Release_time'
#### non useful word like 'at', 'Private Advertiser' need be deleted

In [29]:
# remove "Private advertiser or at" in Column Release_time
df_admin['Release_time'] = df_admin['Release_time'].apply(lambda x: x.split(',')[0])

In [30]:
df_admin.head(5)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,Featured,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of Plenty,Tauranga
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Avenues Orthodontics,Bay of PlentyBay of Plentyarea: TaurangaTauranga,Featured,classification: Administration & Office SupportAdministration & Office SupportsubClassification: ReceptionistsReceptionists,,50620889,,Bay of Plenty,Tauranga
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,New Zealand Police,AucklandAuckland,4d ago,classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther,,50622169,,Auckland,Auckland
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Kew Pacific Island Early Learning Centre,SouthlandSouthlandarea: InvercargillInvercargill,1h ago,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants,,50639620,,Southland,Invercargill
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,CanterburyCanterburyarea: ChristchurchChristchurch,4d ago,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration,,50622432,,Canterbury,Christchurch


In [31]:
# remove "ago"
df_admin['Release_time'] = df_admin['Release_time'].apply(lambda x: x if x.strip() == 'Featured' else x.split(" ")[0])

In [32]:
df_admin.head(5)

Unnamed: 0,Position,Link,Company,Location,Release_time,Classification,Worktype,CompanyID,Salary,Area,City
0,Administrator,https://www.seek.co.nz/job/50582301?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,Bay of PlentyBay of Plentyarea: TaurangaTauranga,Featured,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Office ManagementOffice Management,,50582301,,Bay of Plenty,Tauranga
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promoted#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Avenues Orthodontics,Bay of PlentyBay of Plentyarea: TaurangaTauranga,Featured,classification: Administration & Office SupportAdministration & Office SupportsubClassification: ReceptionistsReceptionists,,50620889,,Bay of Plenty,Tauranga
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,New Zealand Police,AucklandAuckland,4d,classification: Administration & Office SupportAdministration & Office SupportsubClassification: OtherOther,,50622169,,Auckland,Auckland
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=standard#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Kew Pacific Island Early Learning Centre,SouthlandSouthlandarea: InvercargillInvercargill,1h,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Administrative AssistantsAdministrative Assistants,,50639620,,Southland,Invercargill
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=standout#searchRequestToken=feee129e-c80f-4f79-ac5f-98ddb6d6c22b,Private Advertiser,CanterburyCanterburyarea: ChristchurchChristchurch,4d,classification: Administration & Office SupportAdministration & Office SupportsubClassification: Client & Sales AdministrationClient & Sales Administration,,50622432,,Canterbury,Christchurch


In [33]:
# reocord less than 1 day as 1 day.
df_admin['Release_time'] = df_admin['Release_time'].apply(lambda x: "1d" if 'h' in x else x)

#### 2.2.7 Deal with Column Classification

In [34]:
print(df_admin['Classification'].str.contains("\d+").sum()) # 96 rows were filled by salary


96


In [36]:
df_admin['Classification'] =df_admin['Classification'].apply(lambda x: x.split(':', n=2)[1] if ":" in x else "")

TypeError: 'n' is an invalid keyword argument for split()

In [None]:
df_admin.head(5)

2.2.8 Deal with Column salary

In [None]:
df_admin['Salary'] = df_admin.loc[df_admin['Salary'].str.contains("\d"),"Salary"]

In [None]:
df_admin.to_excel("NZ_Admin_JOBS_1.xlsx")