In [None]:
# Task 1: Credit Scoring Model (Controlled Accuracy)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Set seed for reproducibility
np.random.seed(42)

# Create a more realistic credit dataset
n = 120

data = {
    "income": np.random.normal(60000, 15000, n).astype(int),
    "age": np.random.randint(21, 60, n),
    "loan_amount": np.random.normal(200000, 60000, n).astype(int),
    "credit_history": np.random.choice([0, 1], size=n, p=[0.4, 0.6])
}

df = pd.DataFrame(data)

# Create target with some noise (not perfectly predictable)
df["approved"] = (
    (df["income"] > 50000).astype(int)
    & (df["credit_history"] == 1)
).astype(int)

# Inject noise to reduce accuracy
noise_idx = np.random.choice(df.index, size=15, replace=False)
df.loc[noise_idx, "approved"] = 1 - df.loc[noise_idx, "approved"]

X = df.drop("approved", axis=1)
y = df["approved"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Train model
model = LogisticRegression(max_iter=500)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))


Accuracy: 0.9722222222222222
Precision: 0.95
Recall: 1.0
F1 Score: 0.9743589743589743
ROC-AUC: 0.9628482972136223
