In [1]:
import pandas as pd
#Loading the dataset
df_softskill = pd.read_excel('Australian Skills Classification - November 2022_1.xlsx', sheet_name='Core competencies')
df_Occupations = pd.read_excel('004-victorianvetcoursesdata.xlsx', sheet_name='Occupations')

In [2]:
#Filtering the ANZSCO Code
df_softskill = df_softskill[df_softskill['ANZSCO Code'].isin(df_Occupations['ANZSCO'].values)]
#Sort soft skill by score
df_softskill = df_softskill.sort_values(by=['Score'], ascending=[False])
#Show only needed column
df_softskill = df_softskill.loc[:, ['ANZSCO Code', 'Core Competency']]

In [3]:
# Add a new column for the competency number within each ANZSCO Title
df_softskill['index'] = df_softskill.groupby('ANZSCO Code').cumcount() + 1

# Pivot the table to create a new column for each competency
df_softskill = df_softskill.pivot(index='ANZSCO Code', columns='index', values='Core Competency')

# Rename the columns to match the original data
df_softskill = df_softskill.rename(columns={
    1: 'Softskill1',
    2: 'Softskill2',
    3: 'Softskill3',
    4: 'Softskill4',
    5: 'Softskill5'})

# Reset the index to make ANZSCO Title a column again
df_softskill = df_softskill.reset_index()
df_softskill = df_softskill.iloc[:, 0:6]

In [4]:
#Loading the dataset
df_Occupations_desc = pd.read_excel('Australian Skills Classification - November 2022_1.xlsx', sheet_name='Occupation descriptions')
#Select needed column
df_Occupations_desc = df_Occupations_desc[['ANZSCO Code', 'ANZSCO Title', 'ANZSCO Description']]
#Filtering the ANZSCO Code
df_Occupations_desc = df_Occupations_desc[df_Occupations_desc['ANZSCO Code'].isin(df_softskill['ANZSCO Code'].values)]
#Remove duplicate data
df_Occupations_desc = df_Occupations_desc.drop_duplicates(subset=['ANZSCO Code'])

In [5]:
#Merge dataset to for column ANZSCO Title ,ANZSCO Description with Top5 softskill
merged_df = df_Occupations_desc.merge(df_softskill, how='left', on='ANZSCO Code')
merged_df = merged_df.iloc[:, 0:9]
merged_df

Unnamed: 0,ANZSCO Code,ANZSCO Title,ANZSCO Description,Softskill1,Softskill2,Softskill3,Softskill4,Softskill5
0,131112,Sales and Marketing Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Reading,Writing,Teamwork
1,132111,Corporate Services Manager,"Plans, organises, directs, controls and coordi...",Initiative and innovation,Planning and organising,Writing,Oral communication,Reading
2,132311,Human Resource Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Teamwork,Initiative and innovation,Reading,Writing
3,133512,Production Manager (Manufacturing),"Plans, organises, directs, controls and coordi...",Teamwork,Planning and organising,Initiative and innovation,Writing,Oral communication
4,133513,Production Manager (Mining),"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Teamwork,Learning,Reading
...,...,...,...,...,...,...,...,...
264,899415,Tyre Fitter,"Fits, repairs and replaces tyres on motor vehi...",Initiative and innovation,Planning and organising,Teamwork,Learning,Problem solving
265,899511,Printer's Assistant,Performs routine tasks in the production of pr...,Initiative and innovation,Reading,Planning and organising,Oral communication,Learning
266,899611,Recycling or Rubbish Collector,"Collects household, commercial and industrial ...",Initiative and innovation,Planning and organising,Teamwork,Writing,Reading
267,899911,Bicycle Mechanic,"Repairs and adjusts bicycles, and assembles bi...",Teamwork,Planning and organising,Initiative and innovation,Oral communication,Problem solving


In [6]:
#Loading the dataset
df_techskill = pd.read_excel('Australian Skills Classification - November 2022_1.xlsx', sheet_name='Technology tools')
#Select needed column
df_techskill = df_techskill[['ANZSCO Code', 'Technology Tool', 'Technology Tool ID']]
#Filtering the ANZSCO Code
df_techskill = df_techskill[df_techskill['ANZSCO Code'].isin(merged_df['ANZSCO Code'].values)]


In [7]:
# Add a new column for the competency number within each ANZSCO Title
df_techskill['index'] = df_techskill.groupby('ANZSCO Code').cumcount() + 1

# Pivot the table to create a new column for each Techskill
df_techskill_final = df_techskill.pivot(index='ANZSCO Code', columns='index', values='Technology Tool')

# Rename the columns to match the original data
df_techskill_final = df_techskill_final.rename(columns={
    1: 'Techskill1',
    2: 'Techskill2',
    3: 'Techskill3',
})

# Reset the index to make ANZSCO Title a column again
df_techskill_final = df_techskill_final.reset_index()

In [8]:
#Select needed column
df_techskill_final = df_techskill_final.iloc[:, 0:4]
#Merge dataset to for Top3 tech skill
merged_df = pd.merge(merged_df, df_techskill_final, how='left', on='ANZSCO Code')
merged_df

Unnamed: 0,ANZSCO Code,ANZSCO Title,ANZSCO Description,Softskill1,Softskill2,Softskill3,Softskill4,Softskill5,Techskill1,Techskill2,Techskill3
0,131112,Sales and Marketing Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Reading,Writing,Teamwork,Social media platforms,Customer relationship management CRM software,Enterprise resource planning ERP software
1,132111,Corporate Services Manager,"Plans, organises, directs, controls and coordi...",Initiative and innovation,Planning and organising,Writing,Oral communication,Reading,Enterprise resource planning ERP software,Business intelligence and decision support sof...,Project management software
2,132311,Human Resource Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Teamwork,Initiative and innovation,Reading,Writing,Document management and code versioning software,Enterprise resource planning ERP software,Social media platforms
3,133512,Production Manager (Manufacturing),"Plans, organises, directs, controls and coordi...",Teamwork,Planning and organising,Initiative and innovation,Writing,Oral communication,Enterprise resource planning ERP software,Project management software,Video creation and editing software
4,133513,Production Manager (Mining),"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Teamwork,Learning,Reading,Enterprise resource planning ERP software,Project management software,Business intelligence and decision support sof...
...,...,...,...,...,...,...,...,...,...,...,...
264,899415,Tyre Fitter,"Fits, repairs and replaces tyres on motor vehi...",Initiative and innovation,Planning and organising,Teamwork,Learning,Problem solving,,,
265,899511,Printer's Assistant,Performs routine tasks in the production of pr...,Initiative and innovation,Reading,Planning and organising,Oral communication,Learning,Document management and code versioning software,File storage technologies,Graphics or photo imaging software
266,899611,Recycling or Rubbish Collector,"Collects household, commercial and industrial ...",Initiative and innovation,Planning and organising,Teamwork,Writing,Reading,Facilities management software,GPS and navigation technologies,
267,899911,Bicycle Mechanic,"Repairs and adjusts bicycles, and assembles bi...",Teamwork,Planning and organising,Initiative and innovation,Oral communication,Problem solving,Social media platforms,,


In [9]:
#Loading the dataset
df_tech_example = pd.read_excel('Australian Skills Classification - November 2022_1.xlsx', sheet_name='Appendix - tech tool examples')
#Filter for the most important example
df_tech_example = df_tech_example[df_tech_example['Ranking'] == 1]
#Select needed column
df_tech_example = df_tech_example[['Technology tool ID', 'Technology Tool Example']]

#Merge dataset to for Top3 tech skill example
df_tech3 = df_techskill[['ANZSCO Code','Technology Tool ID']]
df_tech3 = pd.merge(df_tech3, df_tech_example, how='left', left_on='Technology Tool ID', right_on='Technology tool ID')
df_tech3

Unnamed: 0,ANZSCO Code,Technology Tool ID,Technology tool ID,Technology Tool Example
0,131112,31063,31063.0,Facebook
1,131112,31015,31015.0,Salesforce software
2,131112,31025,31025.0,SAP Business Objects
3,131112,31033,31033.0,Adobe Photoshop
4,131112,31066,31066.0,SAS
...,...,...,...,...
1110,741111,31039,31039.0,Inventory tracking software
1111,832211,31036,31036.0,Computerised Numerical Control (CNC) machining...
1112,832211,31036,31036.0,Computerised Numerical Control (CNC) machining...
1113,839912,31036,31036.0,Computerised Numerical Control (CNC) machining...


In [10]:
# Add a new column for the competency number within each ANZSCO Title
df_tech3['Job_index'] = df_tech3.groupby('ANZSCO Code').cumcount() + 1

# Pivot the table to create a new column for each tech skill example
df_tech3 = df_tech3.pivot(index='ANZSCO Code', columns='Job_index', values='Technology Tool Example')

# Rename the columns to match the original data
df_tech3 = df_tech3.rename(columns={
    1: 'Technology Tool Example1',
    2: 'Technology Tool Example2',
    3: 'Technology Tool Example3',
})

# Reset the index to make ANZSCO Title a column again
df_tech3 = df_tech3.reset_index()
df_tech3 = df_tech3.iloc[:, 0:4]
#Merge dataset to for Top3 tech skill example
merged_df = pd.merge(merged_df, df_tech3, how='left', on='ANZSCO Code')
merged_df

Unnamed: 0,ANZSCO Code,ANZSCO Title,ANZSCO Description,Softskill1,Softskill2,Softskill3,Softskill4,Softskill5,Techskill1,Techskill2,Techskill3,Technology Tool Example1,Technology Tool Example2,Technology Tool Example3
0,131112,Sales and Marketing Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Reading,Writing,Teamwork,Social media platforms,Customer relationship management CRM software,Enterprise resource planning ERP software,Facebook,Salesforce software,SAP Business Objects
1,132111,Corporate Services Manager,"Plans, organises, directs, controls and coordi...",Initiative and innovation,Planning and organising,Writing,Oral communication,Reading,Enterprise resource planning ERP software,Business intelligence and decision support sof...,Project management software,SAP Business Objects,SAP Business Objects,Microsoft SharePoint
2,132311,Human Resource Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Teamwork,Initiative and innovation,Reading,Writing,Document management and code versioning software,Enterprise resource planning ERP software,Social media platforms,Git,SAP Business Objects,Facebook
3,133512,Production Manager (Manufacturing),"Plans, organises, directs, controls and coordi...",Teamwork,Planning and organising,Initiative and innovation,Writing,Oral communication,Enterprise resource planning ERP software,Project management software,Video creation and editing software,SAP Business Objects,Microsoft SharePoint,Adobe After Effects
4,133513,Production Manager (Mining),"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Teamwork,Learning,Reading,Enterprise resource planning ERP software,Project management software,Business intelligence and decision support sof...,SAP Business Objects,Microsoft SharePoint,SAP Business Objects
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,899415,Tyre Fitter,"Fits, repairs and replaces tyres on motor vehi...",Initiative and innovation,Planning and organising,Teamwork,Learning,Problem solving,,,,,,
265,899511,Printer's Assistant,Performs routine tasks in the production of pr...,Initiative and innovation,Reading,Planning and organising,Oral communication,Learning,Document management and code versioning software,File storage technologies,Graphics or photo imaging software,Git,Cloud storage solutions,Adobe Photoshop
266,899611,Recycling or Rubbish Collector,"Collects household, commercial and industrial ...",Initiative and innovation,Planning and organising,Teamwork,Writing,Reading,Facilities management software,GPS and navigation technologies,,Computerised Maintenance Management System sof...,Global Positioning System GPS devices,
267,899911,Bicycle Mechanic,"Repairs and adjusts bicycles, and assembles bi...",Teamwork,Planning and organising,Initiative and innovation,Oral communication,Problem solving,Social media platforms,,,Facebook,,


In [11]:
#Loading the dataset
df_Course_occ = pd.read_excel('004-victorianvetcoursesdata.xlsx', sheet_name='Course_Occupation')
df_Course_name = pd.read_excel('004-victorianvetcoursesdata.xlsx', sheet_name='Courses')

In [12]:
#Filtering the ANZSCO Code
df_Course_occ = df_Course_occ[df_Course_occ['anzsco'].isin(merged_df['ANZSCO Code'].values)]
df_Course_name = df_Course_name.rename(columns={'ID': 'Course ID'})
df_Course_merged = pd.merge(df_Course_occ, df_Course_name, how='left', on='Course ID')
#Merge course name and entry requirement
df_Course_merged = df_Course_merged[['anzsco', 'Course ID', 'Course Title','entry_requirements']]
df_Course_merged

Unnamed: 0,anzsco,Course ID,Course Title,entry_requirements
0,362411,31,Certificate II in Retail Nursery,No minimum education
1,362411,52,Certificate III in Production Nursery,No minimum education
2,362411,53,Certificate III in Retail Nursery,No minimum education
3,234314,55,Certificate III in Conservation and Land Manag...,No minimum education
4,721111,67,Certificate III in Rural Machinery Operations,No minimum education
...,...,...,...,...
750,721211,11382,Certificate III in Civil Construction,No minimum education
751,321214,11383,Certificate III in Outdoor Power Equipment Tec...,No minimum education
752,234711,11416,Graduate Diploma of Animal Biomechanical Medicine,"Bachelor of Veterinary Science, Chiropractic S..."
753,241213,11516,Advanced Diploma of Rudolf Steiner Education,Relevant Diploma


In [13]:
# Add a new column for the competency number within each ANZSCO Title
df_Course_merged['index'] = df_Course_merged.groupby('anzsco').cumcount() + 1

# Pivot the table to create a new column for each Course
df_Course_name_top5 = df_Course_merged.pivot(index='anzsco', columns='index', values='Course Title')

# Rename the columns to match the original data
df_Course_name_top5 = df_Course_name_top5.rename(columns={
    1: 'Course1',
    2: 'Course2',
    3: 'Course3',
    4: 'Course4',
    5: 'Course5'
})

# Reset the index to make ANZSCO Title a column again
df_Course_name_top5 = df_Course_name_top5.reset_index()

In [14]:
#Merge dataset for top5 course
df_Course_name_top5 = df_Course_name_top5.iloc[:, 0:6]
df_Course_name_top5 = df_Course_name_top5.rename(columns={'anzsco': 'ANZSCO Code'})
merged_df = pd.merge(merged_df, df_Course_name_top5, how='left', on='ANZSCO Code')
merged_df

Unnamed: 0,ANZSCO Code,ANZSCO Title,ANZSCO Description,Softskill1,Softskill2,Softskill3,Softskill4,Softskill5,Techskill1,Techskill2,Techskill3,Technology Tool Example1,Technology Tool Example2,Technology Tool Example3,Course1,Course2,Course3,Course4,Course5
0,131112,Sales and Marketing Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Reading,Writing,Teamwork,Social media platforms,Customer relationship management CRM software,Enterprise resource planning ERP software,Facebook,Salesforce software,SAP Business Objects,Diploma of Fashion and Textiles Merchandising,Advanced Diploma of Fashion and Textiles Merch...,,,
1,132111,Corporate Services Manager,"Plans, organises, directs, controls and coordi...",Initiative and innovation,Planning and organising,Writing,Oral communication,Reading,Enterprise resource planning ERP software,Business intelligence and decision support sof...,Project management software,SAP Business Objects,SAP Business Objects,Microsoft SharePoint,Advanced Diploma of Business,Advanced Diploma of Management,,,
2,132311,Human Resource Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Teamwork,Initiative and innovation,Reading,Writing,Document management and code versioning software,Enterprise resource planning ERP software,Social media platforms,Git,SAP Business Objects,Facebook,Advanced Diploma of Management (Human Resources),,,,
3,133512,Production Manager (Manufacturing),"Plans, organises, directs, controls and coordi...",Teamwork,Planning and organising,Initiative and innovation,Writing,Oral communication,Enterprise resource planning ERP software,Project management software,Video creation and editing software,SAP Business Objects,Microsoft SharePoint,Adobe After Effects,Certificate IV in Food Processing,Diploma of Food Processing,,,
4,133513,Production Manager (Mining),"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Teamwork,Learning,Reading,Enterprise resource planning ERP software,Project management software,Business intelligence and decision support sof...,SAP Business Objects,Microsoft SharePoint,SAP Business Objects,Diploma of Surface Operations Management,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,899415,Tyre Fitter,"Fits, repairs and replaces tyres on motor vehi...",Initiative and innovation,Planning and organising,Teamwork,Learning,Problem solving,,,,,,,Certificate II in Automotive Tyre Servicing Te...,,,,
265,899511,Printer's Assistant,Performs routine tasks in the production of pr...,Initiative and innovation,Reading,Planning and organising,Oral communication,Learning,Document management and code versioning software,File storage technologies,Graphics or photo imaging software,Git,Cloud storage solutions,Adobe Photoshop,Certificate II in Printing and Graphic Arts (G...,Certificate II in Printing and Graphic Arts (S...,,,
266,899611,Recycling or Rubbish Collector,"Collects household, commercial and industrial ...",Initiative and innovation,Planning and organising,Teamwork,Writing,Reading,Facilities management software,GPS and navigation technologies,,Computerised Maintenance Management System sof...,Global Positioning System GPS devices,,Certificate III in Waste Management,Certificate IV in Waste Management,,,
267,899911,Bicycle Mechanic,"Repairs and adjusts bicycles, and assembles bi...",Teamwork,Planning and organising,Initiative and innovation,Oral communication,Problem solving,Social media platforms,,,Facebook,,,Certificate III in Bicycle Workshop Operations,,,,


In [15]:
# Pivot the table to create a new column for each entry_requirements
df_Course_required = df_Course_merged.pivot(index='anzsco', columns='index', values='entry_requirements')

# Rename the columns to match the original data
df_Course_required = df_Course_required.rename(columns={
    1: 'entry_requirements1',
    2: 'entry_requirements2',
    3: 'entry_requirements3',
    4: 'entry_requirements4',
    5: 'entry_requirements5'
})

# Reset the index to make ANZSCO Title a column again
df_Course_required = df_Course_required.reset_index()

In [16]:
#Merge dataset for top5 course entry requirements
df_Course_required = df_Course_required.iloc[:, 0:6]
df_Course_required = df_Course_required.rename(columns={'anzsco': 'ANZSCO Code'})
merged_df = pd.merge(merged_df, df_Course_required, how='left', on='ANZSCO Code')
merged_df

Unnamed: 0,ANZSCO Code,ANZSCO Title,ANZSCO Description,Softskill1,Softskill2,Softskill3,Softskill4,Softskill5,Techskill1,Techskill2,...,Course1,Course2,Course3,Course4,Course5,entry_requirements1,entry_requirements2,entry_requirements3,entry_requirements4,entry_requirements5
0,131112,Sales and Marketing Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Reading,Writing,Teamwork,Social media platforms,Customer relationship management CRM software,...,Diploma of Fashion and Textiles Merchandising,Advanced Diploma of Fashion and Textiles Merch...,,,,No minimum education,Diploma of Fashion and Textiles Merchandising,,,
1,132111,Corporate Services Manager,"Plans, organises, directs, controls and coordi...",Initiative and innovation,Planning and organising,Writing,Oral communication,Reading,Enterprise resource planning ERP software,Business intelligence and decision support sof...,...,Advanced Diploma of Business,Advanced Diploma of Management,,,,Diploma of Business or other relevant qualific...,Diploma of Management; or relevant vocational ...,,,
2,132311,Human Resource Manager,"Plans, organises, directs, controls and coordi...",Planning and organising,Teamwork,Initiative and innovation,Reading,Writing,Document management and code versioning software,Enterprise resource planning ERP software,...,Advanced Diploma of Management (Human Resources),,,,,Relevant Diploma; or relevant vocational exper...,,,,
3,133512,Production Manager (Manufacturing),"Plans, organises, directs, controls and coordi...",Teamwork,Planning and organising,Initiative and innovation,Writing,Oral communication,Enterprise resource planning ERP software,Project management software,...,Certificate IV in Food Processing,Diploma of Food Processing,,,,Certificate III in Food Processing; or relevan...,Certificate IV in Food Processing; or relevant...,,,
4,133513,Production Manager (Mining),"Plans, organises, directs, controls and coordi...",Planning and organising,Initiative and innovation,Teamwork,Learning,Reading,Enterprise resource planning ERP software,Project management software,...,Diploma of Surface Operations Management,,,,,No minimum education,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,899415,Tyre Fitter,"Fits, repairs and replaces tyres on motor vehi...",Initiative and innovation,Planning and organising,Teamwork,Learning,Problem solving,,,...,Certificate II in Automotive Tyre Servicing Te...,,,,,No minimum education,,,,
265,899511,Printer's Assistant,Performs routine tasks in the production of pr...,Initiative and innovation,Reading,Planning and organising,Oral communication,Learning,Document management and code versioning software,File storage technologies,...,Certificate II in Printing and Graphic Arts (G...,Certificate II in Printing and Graphic Arts (S...,,,,No minimum education,No minimum education,,,
266,899611,Recycling or Rubbish Collector,"Collects household, commercial and industrial ...",Initiative and innovation,Planning and organising,Teamwork,Writing,Reading,Facilities management software,GPS and navigation technologies,...,Certificate III in Waste Management,Certificate IV in Waste Management,,,,No minimum education,Certificate III in Waste Management or equivalent,,,
267,899911,Bicycle Mechanic,"Repairs and adjusts bicycles, and assembles bi...",Teamwork,Planning and organising,Initiative and innovation,Oral communication,Problem solving,Social media platforms,,...,Certificate III in Bicycle Workshop Operations,,,,,No minimum education,,,,


In [17]:
#Saving file as CSV
merged_df.to_csv('job_skill_course.csv', index=False)