In [1]:
import pandas as pd

# 1. Load the Adult dataset (comma-separated, drop the “?” placeholders)
cols = [
    "age","workclass","fnlwgt","education","education-num",
    "marital-status","occupation","relationship","race",
    "sex","capital-gain","capital-loss","hours-per-week",
    "native-country","income"
]
df = pd.read_csv(r"E:\DSBDAL\DSBDALExam DataSets\DSBDALExam DataSets\Adult\adult_dataset.csv",
    names=cols,
    na_values=" ?",
    skipinitialspace=True
).dropna()

In [2]:
# s. Create subsets by country, sex, and race
df_us     = df[df["native-country"] == "United-States"]
df_female = df[df["sex"] == "Female"]
df_white  = df[df["race"] == "White"]

In [3]:
print("US subset:",       df_us.shape)
print("Female subset:",   df_female.shape)
print("White subset:",    df_white.shape)

US subset: (43832, 15)
Female subset: (16192, 15)
White subset: (41762, 15)


In [4]:
df_us_female = pd.concat([df_us, df_female], ignore_index=True).drop_duplicates()

In [5]:
print("Merged US+Female:",df_us_female.shape)

Merged US+Female: (45372, 15)


In [6]:
df_sorted = df.sort_values("hours-per-week", ascending=False).reset_index(drop=True)

In [7]:
print("\nTop 3 busiest (hrs/week):\n", df_sorted[["sex","hours-per-week"]].head(3))


Top 3 busiest (hrs/week):
       sex  hours-per-week
0  gender  hours-per-week
1  Female              99
2  Female              99


In [8]:
df_transposed = df.head(5).T

In [9]:
print("\nTransposed (first 5 rows):\n", df_transposed)


Transposed (first 5 rows):
                               0                  1                   2  \
age                         age                 25                  38   
workclass             workclass            Private             Private   
fnlwgt                   fnlwgt             226802               89814   
education             education               11th             HS-grad   
education-num   educational-num                  7                   9   
marital-status   marital-status      Never-married  Married-civ-spouse   
occupation           occupation  Machine-op-inspct     Farming-fishing   
relationship       relationship          Own-child             Husband   
race                       race              Black               White   
sex                      gender               Male                Male   
capital-gain       capital-gain                  0                   0   
capital-loss       capital-loss                  0                   0   
hours-per

In [10]:
df_long = df.reset_index().rename(columns={"index":"RowID"})
df_melted = pd.melt(
    df_long,
    id_vars=["RowID","sex","race","native-country"],
    var_name="feature",
    value_name="value"
)

In [11]:
print("\nMelted sample:\n", df_melted.head(6))


Melted sample:
    RowID     sex   race  native-country feature value
0      0  gender   race  native-country     age   age
1      1    Male  Black   United-States     age    25
2      2    Male  White   United-States     age    38
3      3    Male  White   United-States     age    28
4      4    Male  Black   United-States     age    44
5      5  Female  White   United-States     age    18


In [12]:
df_wide = (
    df_melted
      .pivot(index="RowID", columns="feature", values="value")
      .reset_index(drop=True)
)

In [13]:
print("\nRe-pivoted wide (first 5 rows):\n", df_wide.head())


Re-pivoted wide (first 5 rows):
 feature  age  capital-gain  capital-loss     education    education-num  \
0        age  capital-gain  capital-loss     education  educational-num   
1         25             0             0          11th                7   
2         38             0             0       HS-grad                9   
3         28             0             0    Assoc-acdm               12   
4         44          7688             0  Some-college               10   

feature  fnlwgt  hours-per-week  income      marital-status  \
0        fnlwgt  hours-per-week  income      marital-status   
1        226802              40   <=50K       Never-married   
2         89814              50   <=50K  Married-civ-spouse   
3        336951              40    >50K  Married-civ-spouse   
4        160323              40    >50K  Married-civ-spouse   

feature         occupation  relationship  workclass  
0               occupation  relationship  workclass  
1        Machine-op-inspct  