In [8]:
import pandas as pd

# Load the CSV
df_emp = pd.read_csv(r"C:\Users\jagod\project_group_2_fds_202526\data\04_employment\raw\employment.csv", low_memory=False)

# Keep only the necessary columns
df_emp = df_emp[["Reference area", "TIME_PERIOD", "Sex", "Educational attainment level", "OBS_VALUE"]]

# Rename columns
df_emp.rename(columns={
    "Reference area": "Country",
    "TIME_PERIOD": "Year",
    "Sex": "Gender",
    "Educational attainment level": "Education_Level",
    "OBS_VALUE": "Employment_Rate"
}, inplace=True)

# Define the desired order
edu_order = [
    "Below upper secondary education",
    "Upper secondary or post-secondary non-tertiary education",
    "Tertiary education"
]

df_emp["Education_Level"] = pd.Categorical(
    df_emp["Education_Level"],
    categories=edu_order,
    ordered=True
)

# Convert Employment_Rate to numeric
df_emp["Employment_Rate"] = pd.to_numeric(df_emp["Employment_Rate"], errors="coerce")

# Filter only Male and Female (exclude totals and aggregates)
df_emp = df_emp[df_emp["Gender"].isin(["Male", "Female"])]
exclude = ["OECD", "G20", "European Union (25 countries)"]
df_emp = df_emp[~df_emp["Country"].isin(exclude)]

# Pivot table to have Male/Female in separate columns
df_pivot = df_emp.pivot_table(
    index=["Country", "Year", "Education_Level"],
    columns="Gender",
    values="Employment_Rate"
).reset_index()

# Flatten columns
df_pivot.columns.name = None

# Add Female-to-Male ratio and Female share
df_pivot["F_to_M_ratio"] = df_pivot["Female"] / df_pivot["Male"]
df_pivot["Female_Share"] = df_pivot["Female"] / (df_pivot["Female"] + df_pivot["Male"])

# Sort alphabetically by Country
df_pivot = df_pivot.sort_values(by=["Country", "Year", "Education_Level"]).reset_index(drop=True)

# Optional: display as a proper table
from IPython.display import display
pd.set_option("display.float_format", "{:.2f}".format)
display(df_pivot)


  df_pivot = df_emp.pivot_table(


Unnamed: 0,Country,Year,Education_Level,Female,Male,F_to_M_ratio,Female_Share
0,Argentina,2023,Below upper secondary education,53.46,84.42,0.63,0.39
1,Argentina,2023,Upper secondary or post-secondary non-tertiary...,66.21,88.49,0.75,0.43
2,Argentina,2023,Tertiary education,84.28,93.46,0.90,0.47
3,Australia,2024,Below upper secondary education,52.98,70.71,0.75,0.43
4,Australia,2024,Upper secondary or post-secondary non-tertiary...,74.34,86.40,0.86,0.46
...,...,...,...,...,...,...,...
133,United Kingdom,2024,Upper secondary or post-secondary non-tertiary...,72.82,83.07,0.88,0.47
134,United Kingdom,2024,Tertiary education,84.51,90.20,0.94,0.48
135,United States,2023,Below upper secondary education,43.47,69.18,0.63,0.39
136,United States,2023,Upper secondary or post-secondary non-tertiary...,63.89,76.14,0.84,0.46
