In [5]:
import pandas as pd
from pathlib import Path

INPUT_CSV = Path("../data/processed/nco_structured.csv")

df = pd.read_csv(INPUT_CSV, dtype=str)
print("Loaded:", df.shape)

df.head()


Loaded: (3598, 3)


Unnamed: 0,nco_2015_code,title,nco_2004_code
0,1111.01,"Elected Official, Union Government",1111.1
1,1111.02,"Elected Official, State Government",1112.1
2,1111.03,"Elected Official, Local Bodies",1113.1
3,1111.99,"Legislators, Other",1119.9
4,1112.01,"Administrative Official, Union Government",1121.1


In [6]:
print(df["nco_2015_code"].head(10).tolist())
print(df["nco_2004_code"].head(10).tolist())


['1111.0100', '1111.0200', '1111.0300', '1111.9900', '1112.0100', '1112.0200', '1112.0300', '1112.0400', '1112.0500', '1112.0600']
['1111.10', '1112.10', '1113.10', '1119.90', '1121.10', '1121.20', '1121.30', '1122.10', '1122.20', '1123.10']


In [7]:
def generate_description(row):
    return (
        f"Occupation Title: {row['title']}. "
        f"NCO 2015 Code: {row['nco_2015_code']}. "
        f"Equivalent NCO 2004 Code: {row['nco_2004_code']}. "
        f"This occupation belongs to the National Classification of Occupations (NCO) 2015 dataset."
    )

df["description"] = df.apply(generate_description, axis=1)

df.head()


Unnamed: 0,nco_2015_code,title,nco_2004_code,description
0,1111.01,"Elected Official, Union Government",1111.1,"Occupation Title: Elected Official, Union Gove..."
1,1111.02,"Elected Official, State Government",1112.1,"Occupation Title: Elected Official, State Gove..."
2,1111.03,"Elected Official, Local Bodies",1113.1,"Occupation Title: Elected Official, Local Bodi..."
3,1111.99,"Legislators, Other",1119.9,"Occupation Title: Legislators, Other. NCO 2015..."
4,1112.01,"Administrative Official, Union Government",1121.1,"Occupation Title: Administrative Official, Uni..."


In [8]:
for i in range(5):
    print("-----")
    print(df.loc[i, "description"])


-----
Occupation Title: Elected Official, Union Government. NCO 2015 Code: 1111.0100. Equivalent NCO 2004 Code: 1111.10. This occupation belongs to the National Classification of Occupations (NCO) 2015 dataset.
-----
Occupation Title: Elected Official, State Government. NCO 2015 Code: 1111.0200. Equivalent NCO 2004 Code: 1112.10. This occupation belongs to the National Classification of Occupations (NCO) 2015 dataset.
-----
Occupation Title: Elected Official, Local Bodies. NCO 2015 Code: 1111.0300. Equivalent NCO 2004 Code: 1113.10. This occupation belongs to the National Classification of Occupations (NCO) 2015 dataset.
-----
Occupation Title: Legislators, Other. NCO 2015 Code: 1111.9900. Equivalent NCO 2004 Code: 1119.90. This occupation belongs to the National Classification of Occupations (NCO) 2015 dataset.
-----
Occupation Title: Administrative Official, Union Government. NCO 2015 Code: 1112.0100. Equivalent NCO 2004 Code: 1121.10. This occupation belongs to the National Classifi

In [9]:
OUTPUT_CSV = Path("../data/processed/nco_with_descriptions.csv")

df.to_csv(OUTPUT_CSV, index=False)

print("Saved:", OUTPUT_CSV)
print("Final shape:", df.shape)


Saved: ..\data\processed\nco_with_descriptions.csv
Final shape: (3598, 4)
