# Assignment 6


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]
data = pd.read_csv(url, names=column_names, sep=',\s*', engine='python')

In [3]:
data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [4]:
le = LabelEncoder()
data['income'] = le.fit_transform(data['income'])
categorical_columns = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
numerical_columns = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']


In [5]:
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)


In [6]:
data.head()

Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week,income,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,...,native-country_Portugal,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia
0,39,77516,13,2174,0,40,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,50,83311,13,0,0,13,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,38,215646,9,0,0,40,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,53,234721,7,0,0,40,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,28,338409,13,0,0,40,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X = data.drop('income', axis=1)
y = data['income']


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
scaler = StandardScaler()
X_train[numerical_columns] = scaler.fit_transform(X_train[numerical_columns])
X_test[numerical_columns] = scaler.transform(X_test[numerical_columns])


In [10]:
# Define the wide and deep model
input_layer = tf.keras.layers.Input(shape=(X_train.shape[1],))

# Wide Component
wide = tf.keras.layers.Dense(128, activation='relu')(input_layer)

# Deep Component
deep = tf.keras.layers.Dense(64, activation='relu')(input_layer)
deep = tf.keras.layers.Dense(32, activation='relu')(deep)

In [11]:
# Combine Wide and Deep Components
wide_and_deep = tf.keras.layers.concatenate([wide, deep])

In [12]:
# Output Layer
output = tf.keras.layers.Dense(1, activation='sigmoid')(wide_and_deep)

model = tf.keras.models.Model(inputs=input_layer, outputs=output)

In [13]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [14]:
# Define callbacks
# 1. EarlyStopping: Stop training if the validation loss doesn't improve for a certain number of epochs.
early_stopping_callback = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True
)

In [15]:
# 2. ModelCheckpoint: Save the model's weights during training based on a monitored metric.
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath='best_model.h5', monitor='val_accuracy', save_best_only=True
)

In [16]:
# 3. TensorBoard: Log training metrics for visualization using TensorBoard.
tensorboard_callback = keras.callbacks.TensorBoard(log_dir='./logs')

In [17]:
# Define a learning rate schedule function
def lr_schedule(epoch):
    if epoch < 10:
        return 0.001
    elif epoch < 20:
        return 0.0001
    else:
        return 0.00001

# Learning rate scheduler callback
lr_scheduler_callback = keras.callbacks.LearningRateScheduler(lr_schedule)


In [18]:
# Train the model with callbacks
model.fit(
    X_train, y_train,
    epochs=100, batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping_callback, model_checkpoint_callback, tensorboard_callback,lr_scheduler_callback]
)


Epoch 1/100
Epoch 2/100
 44/407 [==>...........................] - ETA: 1s - loss: 0.3118 - accuracy: 0.8633

  saving_api.save_model(


Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


<keras.src.callbacks.History at 0x25661b48450>

In [19]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Loss: 0.3018
Test Accuracy: 86.09%
