In [72]:
%pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [73]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [74]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 

df = pd.concat([X, y], axis=1)

print(df.head())
print(df.shape)



   age         workclass  fnlwgt  education  education-num  \
0   39         State-gov   77516  Bachelors             13   
1   50  Self-emp-not-inc   83311  Bachelors             13   
2   38           Private  215646    HS-grad              9   
3   53           Private  234721       11th              7   
4   28           Private  338409  Bachelors             13   

       marital-status         occupation   relationship   race     sex  \
0       Never-married       Adm-clerical  Not-in-family  White    Male   
1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   

   capital-gain  capital-loss  hours-per-week native-country income  
0          2174             0              40  United-States  <=50K  
1             0             0             

In [75]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       47879 non-null  object
 2   fnlwgt          48842 non-null  int64 
 3   education       48842 non-null  object
 4   education-num   48842 non-null  int64 
 5   marital-status  48842 non-null  object
 6   occupation      47876 non-null  object
 7   relationship    48842 non-null  object
 8   race            48842 non-null  object
 9   sex             48842 non-null  object
 10  capital-gain    48842 non-null  int64 
 11  capital-loss    48842 non-null  int64 
 12  hours-per-week  48842 non-null  int64 
 13  native-country  48568 non-null  object
 14  income          48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB
None


In [76]:
# Fill missing categorical values with mode
for col in ['workclass', 'occupation', 'native-country']:
    df[col] = df[col].fillna(df[col].mode()[0])


In [77]:
# Target encoding
y = df['income'].apply(lambda x: 1 if '>50K' in x else 0)

# Features
X = df.drop(columns=['income'])


In [78]:
categorical_cols = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)


In [79]:
X_train_df, X_test_df, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    shuffle=True
)


In [80]:
# Convert booleans to float
X_train_df = X_train_df.astype(float)
X_test_df  = X_test_df.astype(float)

# Train statistics
min_vals = X_train_df.min()
max_vals = X_train_df.max()

# Scale
X_train_scaled = (X_train_df - min_vals) / (max_vals - min_vals)
X_test_scaled  = (X_test_df  - min_vals) / (max_vals - min_vals)


In [81]:
x_train = X_train_scaled.values
x_test  = X_test_scaled.values

# y_train = y_train.values.reshape(-1, 1)
# y_test  = y_test.values.reshape(-1, 1)


In [82]:
# # Create a scaled copy
# X_new = X.copy()

# # Convert booleans to float (only in X_new)
# X_new = X_new.astype(float)

# # Minâ€“Max Scaling
# X_new = (X_new - X_new.min()) / (X_new.max() - X_new.min())


In [83]:
# X_np = X_new.values
# y_np = y.values.reshape(-1, 1)


In [84]:
def relu(z):
    return np.maximum(0,z)

def relu_derivative(z):
    return (z>0).astype(float) 

def sigmoid(z):
    return 1/(1+np.exp(-z))

In [85]:
def BCE_loss(y,y_hat):
    return -np.mean(y*np.log(y_hat+1e-8)+(1-y)*np.log(1-y_hat+1e-8))

In [86]:
def he_initialisation(in_dim,out_dim):
    w=np.random.randn(in_dim,out_dim)* np.sqrt(2.0/in_dim)
    b=np.zeros((1,out_dim))
    return w,b


In [87]:
def model(input_dim,h1,h2):
    params={}

    params['w1'],params['b1']=he_initialisation(input_dim,h1)
    params['w2'],params['b2']=he_initialisation(h1,h2)
    params['w3'],params['b3']=he_initialisation(h2,1)

    return params

In [88]:
def forward(X,params):
    Z1=X@params['w1']+params['b1']
    A1=relu(Z1)

    Z2=A1@params['w2']+params['b2']
    A2=relu(Z2)

    Z3=A2@params['w3']+params['b3']
    A3=sigmoid(Z3)

    cache={'X':X,'Z1':Z1,'Z2':Z2,'Z3':Z3,'A1':A1,'A2':A2,'A3':A3}

    return A3,cache

In [89]:
def bwd(y,params,cache):
    m=y.shape[0]
    grads={}

    #output layer
    dZ3=cache['A3']-y
    grads['dW3']=cache['A2'].T @ dZ3 /m
    grads['db3']=np.mean(dZ3,axis=0,keepdims=True)

    # Hidden layer 2
    dA2 = dZ3 @ params['w3'].T
    dZ2 = dA2 * relu_derivative(cache['Z2'])
    grads['dW2'] = cache['A1'].T @ dZ2 / m
    grads['db2'] = np.mean(dZ2,axis=0,keepdims=True)

    # Hidden layer 1
    dA1 = dZ2 @ params['w2'].T
    dZ1 = dA1 * relu_derivative(cache['Z1'])
    grads['dW1'] = cache['X'].T @ dZ1 / m
    grads['db1'] = np.mean(dZ1,axis=0,keepdims=True)

    return grads

In [90]:
def sgd_optimizer(params,grads,lr):
    params['w1']-= lr*grads['dW1']
    params['b1']-= lr*grads['db1']

    params['w2']-= lr*grads['dW2']
    params['b2']-= lr*grads['db2']

    params['w3']-= lr*grads['dW3']
    params['b3']-= lr*grads['db3']

In [91]:
def accuracy(y,y_hat):
    preds=(y_hat>0.5).astype(int)
    return np.mean(preds==y)

In [92]:
def train(x_train, y_train, x_test, y_test,
          hidden1=64, hidden2=32,
          lr=0.01, epochs=2000):

    params = model(x_train.shape[1], hidden1, hidden2)

    for epoch in range(epochs):
        y_hat, cache = forward(x_train, params)
        loss = BCE_loss(y_train, y_hat)

        grads = bwd(y_train, params, cache)
        sgd_optimizer(params, grads, lr)

        if epoch % 100 == 0:
            train_acc = accuracy(y_train, y_hat)
            print(f"Epoch {epoch} | Loss: {loss:.4f} | Acc: {train_acc:.4f}")

    # Test evaluation
    y_test_hat, _ = forward(x_test, params)
    test_acc = accuracy(y_test, y_test_hat)

    print("\nFinal Test Accuracy:", test_acc)
    return params



In [94]:
x_train = np.asarray(x_train, dtype=np.float64)
x_test  = np.asarray(x_test, dtype=np.float64)

y_train = np.asarray(y_train, dtype=np.float64).reshape(-1, 1)
y_test  = np.asarray(y_test, dtype=np.float64).reshape(-1, 1)


In [95]:
params=train(x_train, y_train, x_test, y_test)

Epoch 0 | Loss: nan | Acc: 0.7612
Epoch 100 | Loss: nan | Acc: 0.7612
Epoch 200 | Loss: nan | Acc: 0.7612
Epoch 300 | Loss: nan | Acc: 0.7612
Epoch 400 | Loss: nan | Acc: 0.7612
Epoch 500 | Loss: nan | Acc: 0.7612
Epoch 600 | Loss: nan | Acc: 0.7612
Epoch 700 | Loss: nan | Acc: 0.7612
Epoch 800 | Loss: nan | Acc: 0.7612
Epoch 900 | Loss: nan | Acc: 0.7612
Epoch 1000 | Loss: nan | Acc: 0.7612
Epoch 1100 | Loss: nan | Acc: 0.7612
Epoch 1200 | Loss: nan | Acc: 0.7612
Epoch 1300 | Loss: nan | Acc: 0.7612
Epoch 1400 | Loss: nan | Acc: 0.7612
Epoch 1500 | Loss: nan | Acc: 0.7612
Epoch 1600 | Loss: nan | Acc: 0.7612
Epoch 1700 | Loss: nan | Acc: 0.7612
Epoch 1800 | Loss: nan | Acc: 0.7612
Epoch 1900 | Loss: nan | Acc: 0.7612

Final Test Accuracy: 0.7589313133381104
