In [1]:
# Import libraries with specific purposes in mind
import pandas as pd              # For data manipulation
import numpy as np               # For numerical operations
import matplotlib.pyplot as plt  # For visualizations
import seaborn as sns           # For attractive statistical plots
from ucimlrepo import fetch_ucirepo

# Set up visualization defaults for better readability
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12

# This helps us see all columns when displaying dataframes
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [2]:
# Fetch the dataset
print("Loading UCI Adult dataset...")
adult = fetch_ucirepo(id=2)

# Extract features and target
X = adult.data.features
y = adult.data.targets

# Let's understand what we're working with
print(f"\nDataset shape: {X.shape}")
print(f"This means we have {X.shape[0]:,} people and {X.shape[1]} features about each person")

# Examine the first few rows
print("\nFirst 5 rows of our data:")
print(X.head())

# What are we trying to predict?
print("\nTarget variable (income) distribution:")
print(y.value_counts())
print(f"\nPercentage earning >50K: {(y.values == '>50K').mean() * 100:.2f}%")

Loading UCI Adult dataset...

Dataset shape: (48842, 14)
This means we have 48,842 people and 14 features about each person

First 5 rows of our data:
   age         workclass  fnlwgt  education  education-num  \
0   39         State-gov   77516  Bachelors             13   
1   50  Self-emp-not-inc   83311  Bachelors             13   
2   38           Private  215646    HS-grad              9   
3   53           Private  234721       11th              7   
4   28           Private  338409  Bachelors             13   

       marital-status         occupation   relationship   race     sex  \
0       Never-married       Adm-clerical  Not-in-family  White    Male   
1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   

   capital-gain  capital-loss  