In [20]:
# CREDIT RISK ASSESSMENT USING RANDOM FOREST

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9191345711216817

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.99      0.95      5055
           1       0.94      0.68      0.79      1462

    accuracy                           0.92      6517
   macro avg       0.93      0.83      0.87      6517
weighted avg       0.92      0.92      0.91      6517



In [15]:
# CREDIT RISK ASSESSMENT USING LOGISTIC REGRESSION

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Scaling X with StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.2)

# Create and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.8517807262569832

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.95      0.91      4516
           1       0.72      0.49      0.58      1212

    accuracy                           0.85      5728
   macro avg       0.80      0.72      0.75      5728
weighted avg       0.84      0.85      0.84      5728



In [22]:
# CREDIT RISK ASSESSMENT USING KNNs (K-Nearest Neighbors)

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = KNeighborsClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.8332751396648045

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.93      0.90      4501
           1       0.64      0.49      0.56      1227

    accuracy                           0.83      5728
   macro avg       0.76      0.71      0.73      5728
weighted avg       0.82      0.83      0.82      5728



In [23]:
# CREDIT RISK ASSESSMENT USING SVMs (Support Vector Machines)

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = SVC()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.803945530726257

Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89      4511
           1       0.89      0.09      0.16      1217

    accuracy                           0.80      5728
   macro avg       0.84      0.54      0.53      5728
weighted avg       0.82      0.80      0.73      5728



In [24]:
# CREDIT RISK ASSESSMENT USING XGBoost

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = XGBClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.932786312849162

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.99      0.96      4477
           1       0.95      0.73      0.83      1251

    accuracy                           0.93      5728
   macro avg       0.94      0.86      0.89      5728
weighted avg       0.93      0.93      0.93      5728



In [27]:
# CREDIT RISK ASSESSMENT USING NAIVE BAYES (GaussianNB)

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.8098812849162011

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.95      0.89      4502
           1       0.61      0.31      0.41      1226

    accuracy                           0.81      5728
   macro avg       0.72      0.63      0.65      5728
weighted avg       0.79      0.81      0.78      5728



In [28]:
# CREDIT RISK ASSESSMENT USING DECISION TREES

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Map CSV to a DataFrame
df = pd.read_csv("credit_risk_dataset.csv")

# Converting Y and N to 1 and 0
df["cb_person_default_on_file"] = df["cb_person_default_on_file"].map({'Y': 1, 'N': 0})

# Declare labels
X = df.drop(["loan_status"], axis = 1)
y = df["loan_status"]

# Dropping rows with NaNs
X = X.dropna()
y = y[X.index]

# Hot-Encoding non-binary values
X = pd.get_dummies(X, columns = ["person_home_ownership", "loan_intent"])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Create and train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the results
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.8791899441340782

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.91      0.92      4509
           1       0.69      0.78      0.73      1219

    accuracy                           0.88      5728
   macro avg       0.82      0.84      0.83      5728
weighted avg       0.89      0.88      0.88      5728

