In [11]:
%pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [48]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
y = adult.data.targets 

df = pd.concat([X, y], axis=1)

print(df.head())
print(df.shape)



   age         workclass  fnlwgt  education  education-num  \
0   39         State-gov   77516  Bachelors             13   
1   50  Self-emp-not-inc   83311  Bachelors             13   
2   38           Private  215646    HS-grad              9   
3   53           Private  234721       11th              7   
4   28           Private  338409  Bachelors             13   

       marital-status         occupation   relationship   race     sex  \
0       Never-married       Adm-clerical  Not-in-family  White    Male   
1  Married-civ-spouse    Exec-managerial        Husband  White    Male   
2            Divorced  Handlers-cleaners  Not-in-family  White    Male   
3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   
4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   

   capital-gain  capital-loss  hours-per-week native-country income  
0          2174             0              40  United-States  <=50K  
1             0             0             

In [50]:
# print(df.head())
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       47879 non-null  object
 2   fnlwgt          48842 non-null  int64 
 3   education       48842 non-null  object
 4   education-num   48842 non-null  int64 
 5   marital-status  48842 non-null  object
 6   occupation      47876 non-null  object
 7   relationship    48842 non-null  object
 8   race            48842 non-null  object
 9   sex             48842 non-null  object
 10  capital-gain    48842 non-null  int64 
 11  capital-loss    48842 non-null  int64 
 12  hours-per-week  48842 non-null  int64 
 13  native-country  48568 non-null  object
 14  income          48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB
None


In [51]:
df=df.fillna("unknown")

In [52]:
df['income'] = df['income'].str.replace('.', '', regex=False)
df['income'] = (df['income'] == '>50K').astype(int)


In [55]:
categorical_cols = df.select_dtypes(include=['object']).columns

df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)


In [56]:
X = df.drop('income', axis=1)
y = df['income']


In [57]:
X=X.values.astype(np.float32)
y=y.values.reshape(-1,1).astype(np.float32)

In [75]:
x_train, x_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

Activation functions


In [74]:
def relu(z):
    return np.maximum(0,z)

def relu_derivative(z):
    return (z>0).astype(float) 

def sigmoid(z):
    return 1/(1+np.exp(-z))

BCE loss function

In [72]:
def BCE_loss(y,y_hat):
    return -np.mean(y*np.log(y_hat+1e-8)+(1-y)*np.log(1-y_hat+1e-8))

HE initialization


In [73]:
def he_initialisation(in_dim,out_dim):
    w=np.random.randn(in_dim,out_dim)* np.sqrt(2.0/in_dim)
    b=np.zeros((1,out_dim))
    return w,b


Model initialization

In [71]:
def model(input_dim,h1,h2):
    params={}

    params['w1'],params['b1']=he_initialisation(input_dim,h1)
    params['w2'],params['b2']=he_initialisation(h1,h2)
    params['w3'],params['b3']=he_initialisation(h2,1)

    return params

Forward pass

In [70]:
def forward(X,params):
    Z1=X@params['w1']+params['b1']
    A1=relu(Z1)

    Z2=A1@params['w2']+params['b2']
    A2=relu(Z2)

    Z3=A2@params['w3']+params['b3']
    A3=sigmoid(Z3)

    cache={'X':X,'Z1':Z1,'Z2':Z2,'Z3':Z3,'A1':A1,'A2':A2,'A3':A3}

    return A3,cache


Backpropagation

In [None]:
def bwd(y,params,cache):
    m=y.shape[0]
    grads={}

    #output layer
    dZ3=cache['A3']-y
    grads['dW3']=cache['A2'].T @ dZ3 /m
    grads['db3']=np.mean(dZ3,axis=0,keepdims=True)

    # Hidden layer 2
    dA2 = dZ3 @ params['w3'].T
    dZ2 = dA2 * relu_derivative(cache['Z2'])
    grads['dW2'] = cache['A1'].T @ dZ2 / m
    grads['db2'] = np.mean(dZ2,axis=0,keepdims=True)

    # Hidden layer 1
    dA1 = dZ2 @ params['w2'].T
    dZ1 = dA1 * relu_derivative(cache['Z1'])
    grads['dW1'] = cache['X'].T @ dZ1 / m
    grads['db1'] = np.mean(dZ1,axis=0,keepdims=True)

    return grads


SGD optimizer

In [77]:
def sgd_optimizer(params,grads,lr):
    params['w1']-= lr*grads['dW1']
    params['b1']-= lr*grads['db1']

    params['w2']-= lr*grads['dW2']
    params['b2']-= lr*grads['db2']

    params['w3']-= lr*grads['dW3']
    params['b3']-= lr*grads['db3']


Accuracy functions

In [78]:
def accuracy(y,y_hat):
    preds=(y_hat>0.5).astype(int)
    return np.mean(preds==y)

Training loop

In [79]:
def train(x_train, y_train, x_test, y_test,
          hidden1=64, hidden2=32,
          lr=0.01, epochs=2000):

    params = model(x_train.shape[1], hidden1, hidden2)

    for epoch in range(epochs):
        y_hat, cache = forward(x_train, params)
        loss = BCE_loss(y_train, y_hat)

        grads = bwd(y_train, params, cache)
        sgd_optimizer(params, grads, lr)

        if epoch % 100 == 0:
            train_acc = accuracy(y_train, y_hat)
            print(f"Epoch {epoch} | Loss: {loss:.4f} | Acc: {train_acc:.4f}")

    # Test evaluation
    y_test_hat, _ = forward(x_test, params)
    test_acc = accuracy(y_test, y_test_hat)

    print("\nFinal Test Accuracy:", test_acc)
    return params



In [83]:
params=train(x_train, y_train, x_test, y_test)

  return 1/(1+np.exp(-z))


Epoch 0 | Loss: 14.0339 | Acc: 0.2381
Epoch 100 | Loss: 0.6386 | Acc: 0.7612
Epoch 200 | Loss: 0.6061 | Acc: 0.7612
Epoch 300 | Loss: 0.5860 | Acc: 0.7612
Epoch 400 | Loss: 0.5734 | Acc: 0.7612
Epoch 500 | Loss: 0.5653 | Acc: 0.7612
Epoch 600 | Loss: 0.5601 | Acc: 0.7612
Epoch 700 | Loss: 0.5567 | Acc: 0.7612
Epoch 800 | Loss: 0.5544 | Acc: 0.7612
Epoch 900 | Loss: 0.5529 | Acc: 0.7612
Epoch 1000 | Loss: 0.5519 | Acc: 0.7612
Epoch 1100 | Loss: 0.5512 | Acc: 0.7612
Epoch 1200 | Loss: 0.5507 | Acc: 0.7612
Epoch 1300 | Loss: 0.5504 | Acc: 0.7612
Epoch 1400 | Loss: 0.5502 | Acc: 0.7612
Epoch 1500 | Loss: 0.5501 | Acc: 0.7612
Epoch 1600 | Loss: 0.5500 | Acc: 0.7612
Epoch 1700 | Loss: 0.5499 | Acc: 0.7612
Epoch 1800 | Loss: 0.5498 | Acc: 0.7612
Epoch 1900 | Loss: 0.5498 | Acc: 0.7612

Final Test Accuracy: 0.7589313133381104


difference between the two results and conclusion:-

when the same fully connected neural network was trained on raw features and on Min–Max scaled features using identical hyperparameters, the final test accuracy remained almost the same in both cases (approximately 75.9%). However, the training dynamics differed substantially. With raw, unscaled data, the initial loss was very high (around 14), the model required roughly 300 epochs to reach 75% accuracy, and training was unstable in the early stages, often accompanied by numerical issues such as sigmoid overflow. In contrast, after applying min–max scaling, the initial loss dropped sharply to about 0.69, the model reached 75% accuracy much faster (within roughly 80–100 epochs), and training remained stable throughout, with little to no numerical warnings. This comparison shows that while feature scaling may not significantly change the final accuracy, it greatly improves convergence speed, numerical stability, and overall training behavior.
