<a href="https://colab.research.google.com/github/piyushcoder12/codealpha_tasks-/blob/main/Credit_Scoring_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Load your dataset
df = pd.read_csv('/content/data.csv')  # Make sure your CSV file is named correctly or adjust the path

# Encode categorical columns
categorical_cols = ['Gender', 'Education', 'Payment_History',
                    'Employment_Status', 'Residence_Type', 'Marital_Status']

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoder for inverse transform if needed

# Define features and target
X = df.drop('Creditworthiness', axis=1)
y = df['Creditworthiness']

# Train-test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Standardize numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------------------------------
# Logistic Regression Model
# -------------------------------
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_scaled, y_train)

lr_preds = lr_model.predict(X_test_scaled)
lr_proba = lr_model.predict_proba(X_test_scaled)[:, 1]

print("=== Logistic Regression ===")
print("Classification Report:\n", classification_report(y_test, lr_preds))
print("ROC-AUC Score:", roc_auc_score(y_test, lr_proba))

# -------------------------------
# Random Forest Model
# -------------------------------
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

rf_preds = rf_model.predict(X_test)
rf_proba = rf_model.predict_proba(X_test)[:, 1]

print("\n=== Random Forest ===")
print("Classification Report:\n", classification_report(y_test, rf_preds))
print("ROC-AUC Score:", roc_auc_score(y_test, rf_proba))


=== Logistic Regression ===
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       714
           1       0.70      1.00      0.83      1686

    accuracy                           0.70      2400
   macro avg       0.35      0.50      0.41      2400
weighted avg       0.49      0.70      0.58      2400

ROC-AUC Score: 0.503276280856352


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



=== Random Forest ===
Classification Report:
               precision    recall  f1-score   support

           0       0.41      0.02      0.03       714
           1       0.70      0.99      0.82      1686

    accuracy                           0.70      2400
   macro avg       0.56      0.50      0.43      2400
weighted avg       0.62      0.70      0.59      2400

ROC-AUC Score: 0.5051175274380214


In [None]:
from google.colab import drive
drive.mount('/content/drive')