# Credit Scoring Model using Random Forest
This notebook is a submission for the CodeAlpha internship Task 1.

In [None]:

# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve


In [None]:

# Step 2: Load Dataset
url = "https://raw.githubusercontent.com/Statology/Python-Guides/main/default.csv"
data = pd.read_csv(url)

print("Sample Data:")
print(data.head())


In [None]:

# Step 3: Preprocessing
# Encode 'student' column (Yes/No → 1/0)
data['student'] = LabelEncoder().fit_transform(data['student'])

# Convert target column to numeric (Yes → 1, No → 0)
data['default'] = LabelEncoder().fit_transform(data['default'])


In [None]:

# Step 4: Define Features and Target
X = data[['student', 'balance', 'income']]
y = data['default']


In [None]:

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Step 6: Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:

# Step 7: Make Predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]


In [None]:

# Step 8: Evaluate Model
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


In [None]:

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig("confusion_matrix.png")
plt.show()


In [None]:

# ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_proba)
roc_auc = roc_auc_score(y_test, y_proba)

plt.plot(fpr, tpr, label=f"ROC AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right")
plt.grid()
plt.tight_layout()
plt.savefig("roc_curve.png")
plt.show()


In [None]:

# Step 9: Feature Importance
importances = model.feature_importances_
features = X.columns
indices = np.argsort(importances)

plt.barh(range(len(indices)), importances[indices], align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.title("Feature Importance")
plt.xlabel("Importance")
plt.grid(True)
plt.tight_layout()
plt.savefig("feature_importance.png")
plt.show()
