In [1]:
import pandas as pd

# 1. Load the Adult dataset (comma-separated, drop the “?” placeholders)
cols = [
    "age","workclass","fnlwgt","education","education-num",
    "marital-status","occupation","relationship","race",
    "sex","capital-gain","capital-loss","hours-per-week",
    "native-country","income"
]
df = pd.read_csv(r"E:\DSBDAL\DSBDALExam DataSets\DSBDALExam DataSets\Adult\adult_dataset.csv",
    names=cols,
    na_values=" ?",
    skipinitialspace=True
).dropna()

In [2]:
# s. Create subsets by country, sex, and race
df_us     = df[df["native-country"] == "United-States"]
df_female = df[df["sex"] == "Female"]
df_white  = df[df["race"] == "White"]

In [3]:
print("US subset:",       df_us.shape)
print("Female subset:",   df_female.shape)
print("White subset:",    df_white.shape)

US subset: (29170, 15)
Female subset: (10771, 15)
White subset: (27816, 15)


In [4]:
df_us_female = pd.concat([df_us, df_female], ignore_index=True).drop_duplicates()

In [5]:
print("Merged US+Female:",df_us_female.shape)

Merged US+Female: (30239, 15)


In [6]:
df_sorted = df.sort_values("hours-per-week", ascending=False).reset_index(drop=True)

In [7]:
print("\nTop 3 busiest (hrs/week):\n", df_sorted[["sex","hours-per-week"]].head(3))


Top 3 busiest (hrs/week):
     sex  hours-per-week
0  Male              99
1  Male              99
2  Male              99


In [8]:
df_transposed = df.head(5).T

In [10]:
print("\nTransposed (first 5 rows):\n", df_transposed)


Transposed (first 5 rows):
                             0                   1                  2  \
age                        39                  50                 38   
workclass           State-gov    Self-emp-not-inc            Private   
fnlwgt                  77516               83311             215646   
education           Bachelors           Bachelors            HS-grad   
education-num              13                  13                  9   
marital-status  Never-married  Married-civ-spouse           Divorced   
occupation       Adm-clerical     Exec-managerial  Handlers-cleaners   
relationship    Not-in-family             Husband      Not-in-family   
race                    White               White              White   
sex                      Male                Male               Male   
capital-gain             2174                   0                  0   
capital-loss                0                   0                  0   
hours-per-week             40      

In [11]:
df_long = df.reset_index().rename(columns={"index":"RowID"})
df_melted = pd.melt(
    df_long,
    id_vars=["RowID","sex","race","native-country"],
    var_name="feature",
    value_name="value"
)

In [12]:
print("\nMelted sample:\n", df_melted.head(6))


Melted sample:
    RowID     sex   race native-country feature value
0      0    Male  White  United-States     age    39
1      1    Male  White  United-States     age    50
2      2    Male  White  United-States     age    38
3      3    Male  Black  United-States     age    53
4      4  Female  Black           Cuba     age    28
5      5  Female  White  United-States     age    37


In [13]:
df_wide = (
    df_melted
      .pivot(index="RowID", columns="feature", values="value")
      .reset_index(drop=True)
)

In [14]:
print("\nRe-pivoted wide (first 5 rows):\n", df_wide.head())


Re-pivoted wide (first 5 rows):
 feature age capital-gain capital-loss  education education-num  fnlwgt  \
0        39         2174            0  Bachelors            13   77516   
1        50            0            0  Bachelors            13   83311   
2        38            0            0    HS-grad             9  215646   
3        53            0            0       11th             7  234721   
4        28            0            0  Bachelors            13  338409   

feature hours-per-week income      marital-status         occupation  \
0                   40  <=50K       Never-married       Adm-clerical   
1                   13  <=50K  Married-civ-spouse    Exec-managerial   
2                   40  <=50K            Divorced  Handlers-cleaners   
3                   40  <=50K  Married-civ-spouse  Handlers-cleaners   
4                   40  <=50K  Married-civ-spouse     Prof-specialty   

feature   relationship         workclass  
0        Not-in-family         State-gov  
1 