**DATA PREPROCESSING**

In [1]:
# Importing the necessary libraries
import pandas as pd
import numpy as np

In [8]:
# Importing the dataset
isco_08_df = pd.read_csv("ISCO-08-Data.csv")

# Number of rows and columns
isco_08_df.shape

(619, 8)

In [9]:
# Replacing spaces with underscores in column names
isco_08_df.columns = isco_08_df.columns.str.replace(' ', '_')
isco_08_df.columns

Index(['Level', 'ISCO_08_Code', 'Title_EN', 'Definition', 'Tasks_include',
       'Included_occupations', 'Excluded_occupations', 'Notes'],
      dtype='object')

In [10]:
# Filter rows where the column "ISCO_08_Code" contains exactly 4 digits
isco_08_df = isco_08_df[isco_08_df['ISCO_08_Code'].astype(str).str.match(r'^\d{4}$')]
isco_08_df.head()

Unnamed: 0,Level,ISCO_08_Code,Title_EN,Definition,Tasks_include,Included_occupations,Excluded_occupations,Notes
3,4,1111,Legislators,"Legislators determine, formulate, and direct p...",Tasks include -\n(a) presiding over or partic...,Examples of the occupations classified here:\n...,,
4,4,1112,Senior Government Officials,Senior government officials advise governments...,"Tasks include -\n(a) advising national, state...",Examples of the occupations classified here:\n...,,Chief executives of Government-owned enterpris...
5,4,1113,Traditional Chiefs and Heads of Villages,Traditional chiefs and heads of villages perfo...,Tasks include -\n(a) allocating the use of co...,Examples of the occupations classified here:\n...,,
6,4,1114,Senior Officials of Special-interest Organizat...,Senior officials of special-interest organizat...,Tasks include -\n(a) determining and formulat...,Examples of the occupations classified here:\n...,,
8,4,1120,Managing Directors and Chief Executives,Managing directors and chief executives formul...,"Tasks include -\n(a) planning, directing and ...",Examples of the occupations classified here:\n...,,Regional managers and other senior managers wh...


In [11]:
# Combining all columns into "detailed_explanation"
isco_08_df["detailed_explanation"] = isco_08_df.apply(
    lambda row: "\n".join(
        [
            f"{col}:\n{row[col]}"
            for col in isco_08_df.columns if col != "detailed_explanation"
        ]
    ),
    axis=1
)

isco_08_df.head()

Unnamed: 0,Level,ISCO_08_Code,Title_EN,Definition,Tasks_include,Included_occupations,Excluded_occupations,Notes,detailed_explanation
3,4,1111,Legislators,"Legislators determine, formulate, and direct p...",Tasks include -\n(a) presiding over or partic...,Examples of the occupations classified here:\n...,,,Level:\n4\nISCO_08_Code:\n1111\nTitle_EN:\nLeg...
4,4,1112,Senior Government Officials,Senior government officials advise governments...,"Tasks include -\n(a) advising national, state...",Examples of the occupations classified here:\n...,,Chief executives of Government-owned enterpris...,Level:\n4\nISCO_08_Code:\n1112\nTitle_EN:\nSen...
5,4,1113,Traditional Chiefs and Heads of Villages,Traditional chiefs and heads of villages perfo...,Tasks include -\n(a) allocating the use of co...,Examples of the occupations classified here:\n...,,,Level:\n4\nISCO_08_Code:\n1113\nTitle_EN:\nTra...
6,4,1114,Senior Officials of Special-interest Organizat...,Senior officials of special-interest organizat...,Tasks include -\n(a) determining and formulat...,Examples of the occupations classified here:\n...,,,Level:\n4\nISCO_08_Code:\n1114\nTitle_EN:\nSen...
8,4,1120,Managing Directors and Chief Executives,Managing directors and chief executives formul...,"Tasks include -\n(a) planning, directing and ...",Examples of the occupations classified here:\n...,,Regional managers and other senior managers wh...,Level:\n4\nISCO_08_Code:\n1120\nTitle_EN:\nMan...


In [12]:
# Saving the preprocessed data
isco_08_df.to_csv("preprocessed_isco_08_data.csv", index=False)