In [1]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# Load dataset
data = pd.read_csv("Loan_default_numeric.csv")

# Exclude non-numeric columns
numeric_columns = ['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed', 'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio', 'Education', 'EmploymentType', 'MaritalStatus', 'HasMortgage', 'HasDependents', 'LoanPurpose', 'HasCoSigner','Default']

# Create a DataFrame with only numeric columns
data_numeric = data[numeric_columns]

In [2]:
# Split the data into X (features) and y (target variable)
X = data_numeric.drop('Default', axis=1)
y = data_numeric['Default']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to balance the dataset
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [3]:
# Combine the resampled features and target variable
resampled_data = pd.concat([X_resampled, y_resampled], axis=1)

# Define the path for the output CSV file
output_csv_path = "SMOTEofNumeric.csv"

# Save the combined data to a CSV file
resampled_data.to_csv(output_csv_path, index=False)