In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Step 1: Load the dataset
df = pd.read_csv("1.simple_loan_data.csv")

# Step 2: Drop Loan_ID (not useful for prediction)
df.drop(columns=["Loan_ID"], inplace=True)

# Step 3: Handle missing values
# Fill categorical with mode
for col in ['Gender', 'Married', 'Dependents', 'Self_Employed']:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Fill numeric with median
for col in ['LoanAmount', 'Loan_Amount_Term', 'Credit_History']:
    df[col].fillna(df[col].median(), inplace=True)

# Step 4: Encode categorical columns
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

# Step 5: Split features and target
X = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]

# Step 6: Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Step 8: Build the ANN model
model = Sequential()
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 9: Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=16, callbacks=[early_stop], verbose=1)

# Step 10: Evaluate the model
y_pred = model.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_labels)
print(f"✅ Test Accuracy: {accuracy * 100:.2f}%")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting value

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 57ms/step - accuracy: 0.4793 - loss: 0.7450 - val_accuracy: 0.6869 - val_loss: 0.6518
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6803 - loss: 0.6494 - val_accuracy: 0.7172 - val_loss: 0.6138
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7560 - loss: 0.5911 - val_accuracy: 0.7273 - val_loss: 0.5881
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7486 - loss: 0.5840 - val_accuracy: 0.7576 - val_loss: 0.5642
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7850 - loss: 0.5378 - val_accuracy: 0.7879 - val_loss: 0.5426
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.8167 - loss: 0.4945 - val_accuracy: 0.7980 - val_loss: 0.5273
Epoch 7/100
[1m25/25[0m [

logistic regression

In [5]:
model2 = Sequential()
model2.add(Dense(1, input_dim=X_train.shape[1], activation='sigmoid'))

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history2 = model2.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=16, callbacks=[early_stop], verbose=1)

y_pred2 = (model2.predict(X_test) > 0.5).astype(int)
print("🔸 Logistic_Model Accuracy:", accuracy_score(y_test, y_pred2))


Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.5359 - loss: 1.0035 - val_accuracy: 0.5051 - val_loss: 0.8931
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5515 - loss: 0.8868 - val_accuracy: 0.5051 - val_loss: 0.8749
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5592 - loss: 0.8876 - val_accuracy: 0.5051 - val_loss: 0.8573
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.5741 - loss: 0.8430 - val_accuracy: 0.5051 - val_loss: 0.8409
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.5275 - loss: 0.9056 - val_accuracy: 0.4949 - val_loss: 0.8240
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.5554 - loss: 0.8443 - val_accuracy: 0.4949 - val_loss: 0.8071
Epoch 7/100
[1m25/25[0m [

tiny overfit model

In [6]:
model3 = Sequential()
model3.add(Dense(8, input_dim=X_train.shape[1], activation='relu'))
model3.add(Dense(4, activation='relu'))
model3.add(Dense(1, activation='sigmoid'))

model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history3 = model3.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=8, callbacks=[early_stop], verbose=1)

y_pred3 = (model3.predict(X_test) > 0.5).astype(int)
print("🔸 Tiny_Overfit_Model Accuracy:", accuracy_score(y_test, y_pred3))


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.2977 - loss: 0.8454 - val_accuracy: 0.4949 - val_loss: 0.7440
Epoch 2/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4785 - loss: 0.7535 - val_accuracy: 0.5758 - val_loss: 0.7038
Epoch 3/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5776 - loss: 0.7189 - val_accuracy: 0.6465 - val_loss: 0.6791
Epoch 4/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6796 - loss: 0.6685 - val_accuracy: 0.6465 - val_loss: 0.6636
Epoch 5/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6641 - loss: 0.6710 - val_accuracy: 0.6768 - val_loss: 0.6503
Epoch 6/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7335 - loss: 0.6347 - val_accuracy: 0.6869 - val_loss: 0.6375
Epoch 7/100
[1m49/49[0m [32m━━━━━━━━━━━━━

deeper network

In [7]:
model2 = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.4),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history2 = model2.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=16, callbacks=[early_stop], verbose=0)

y_pred2 = (model2.predict(X_test) > 0.5).astype(int)
print("Model 2 Accuracy:", accuracy_score(y_test, y_pred2))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Model 2 Accuracy: 0.7886178861788617


cnn model

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Reshape

# Reshape X data for Conv1D: (samples, timesteps, features) => treat each feature as a timestep
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build CNN model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(16, activation='relu'))
cnn_model.add(Dropout(0.3))
cnn_model.add(Dense(1, activation='sigmoid'))

cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train CNN model
history_cnn = cnn_model.fit(X_train_cnn, y_train, validation_split=0.2, epochs=100, batch_size=16, callbacks=[early_stop], verbose=1)

# Predict and evaluate
y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype(int)
print("🔹 CNN Model Accuracy:", accuracy_score(y_test, y_pred_cnn))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 61ms/step - accuracy: 0.5346 - loss: 0.7136 - val_accuracy: 0.7475 - val_loss: 0.6568
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.7680 - loss: 0.6253 - val_accuracy: 0.7475 - val_loss: 0.6173
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.7552 - loss: 0.5877 - val_accuracy: 0.7778 - val_loss: 0.5840
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.7623 - loss: 0.5555 - val_accuracy: 0.7677 - val_loss: 0.5635
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.7962 - loss: 0.5268 - val_accuracy: 0.7677 - val_loss: 0.5446
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.8421 - loss: 0.5029 - val_accuracy: 0.7677 - val_loss: 0.5354
Epoch 7/100
[1m25/25[0m [