## Credit Repayment Prediction




This notebook predicts whether a credit card user will repay their credit next month using a machine learning model. It includes data cleaning, model training, and a graphical user interface (GUI) for user input.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE, RandomOverSampler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import tkinter as tk
from tkinter import messagebox


## Load, Clean, and Balance the Dataset

This function loads the dataset, cleans negative values, and balances the data using SMOTE and RandomOverSampler techniques.


In [3]:
def load_clean_and_balance_data(file_path):
    df = pd.read_csv(file_path)
    
    # Remove negative values
    for column in ['BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6',
                   'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']:
        df = df[df[column] >= 0]
    
    # Convert categorical variables to numerical
    df = pd.get_dummies(df, columns=['SEX', 'EDUCATION', 'MARRIAGE'], drop_first=True)
    
    # Separate target variable and features
    X = df.drop('default.payment.next.month', axis=1)
    y = df['default.payment.next.month']
    
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Balance the dataset using SMOTE and RandomOverSampler
    smote = SMOTE(random_state=42)
    ros = RandomOverSampler(random_state=42)
    X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)
    X_train_balanced, y_train_balanced = ros.fit_resample(X_train_balanced, y_train_balanced)
    
    return X_train_balanced, X_test, y_train_balanced, y_test, X.columns  # Return column order


## Train and Evaluate the Model

This function scales the data, trains an XGBoost model, and evaluates its performance.


In [4]:
def train_and_evaluate_model(X_train, y_train, X_test, y_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # XGBoost Model
    model = XGBClassifier(random_state=42, scale_pos_weight=len(y_train) / sum(y_train))
    model.fit(X_train_scaled, y_train)
    
    return model, scaler


## Create the GUI

This function sets up a graphical user interface (GUI) where users can input their data and get a prediction.


In [5]:
def create_gui(model, scaler, X_train_columns):
    root = tk.Tk()
    root.title("Credit Repayment Prediction")

    def predict():
        try:
            # Get inputs and validate them
            credit_limit = float(credit_limit_entry.get().strip())
            age = float(age_entry.get().strip())
            pay_status_str = pay_status_var.get()
            pay_status_num = pay_status_choices[pay_status_str]
            gender = gender_var.get()
            education = education_var.get()
            marriage = marriage_var.get()

            # Set dummy variables for gender
            if gender == "Male":
                input_data = {'SEX_2': 0}  # Male corresponds to SEX_2 = 0
            else:
                input_data = {'SEX_2': 1}  # Female corresponds to SEX_2 = 1

            # Set dummy variables for education
            education_mapping = {
                'Graduate School': '1',
                'University': '2',
                'High School': '3',
                'Others': '4'
            }
            education_value = education_mapping[education]
            input_data[f'EDUCATION_{education_value}'] = 1
            for i in range(1, 7):
                if f'EDUCATION_{i}' not in input_data:
                    input_data[f'EDUCATION_{i}'] = 0

            # Set dummy variables for marital status
            marriage_mapping = {
                'Married': '1',
                'Single': '2',
                'Others': '3'
            }
            marriage_value = marriage_mapping[marriage]
            input_data[f'MARRIAGE_{marriage_value}'] = 1
            for i in range(1, 4):
                if f'MARRIAGE_{i}' not in input_data:
                    input_data[f'MARRIAGE_{i}'] = 0

            # Other inputs
            input_data['LIMIT_BAL'] = credit_limit
            input_data['AGE'] = age
            input_data['PAY_0'] = pay_status_num

            # Set all other columns to zero if they are not in the input
            for col in X_train_columns:
                if col not in input_data:
                    input_data[col] = 0

            # Create DataFrame while maintaining column order
            input_df = pd.DataFrame([input_data], columns=X_train_columns)
            print(f"Input Data Prepared: {input_df}")  # Debugging

            input_df_scaled = scaler.transform(input_df)
            result = model.predict(input_df_scaled)
            message = "Prediction: Will repay" if result == 0 else "Prediction: Will not repay"
            messagebox.showinfo("Result", message)
        except ValueError as e:
            print(f"ValueError: {e}")  # Debugging
            messagebox.showerror("Error", "Please enter valid numbers for all fields.")
        except KeyError as e:
            print(f"KeyError: {e}")  # Debugging
            messagebox.showerror("Error", f"Key Error: {e}")
        except tk.TclError as e:
            print(f"TclError: {e}")  # Debugging
            messagebox.showerror("Error", "Please select an option for all list boxes.")
        except Exception as e:
            print(f"Exception: {e}")  # Debugging
            messagebox.showerror("Error", str(e))

    # Input fields
    tk.Label(root, text="Credit Limit").grid(row=0, column=0)
    credit_limit_entry = tk.Entry(root)
    credit_limit_entry.grid(row=0, column=1)

    tk.Label(root, text="Age").grid(row=1, column=0)
    age_entry = tk.Entry(root)
    age_entry.grid(row=1, column=1)

    # Dropdown for gender
    tk.Label(root, text="Gender").grid(row=2, column=0)
    gender_var = tk.StringVar(root)
    gender_choices = ['Male', 'Female']
    gender_var.set('Male')
    tk.OptionMenu(root, gender_var, *gender_choices).grid(row=2, column=1)

    # Dropdown for education
    tk.Label(root, text="Education").grid(row=3, column=0)
    education_var = tk.StringVar(root)
    education_choices = ['Graduate School', 'University', 'High School', 'Others']
    education_var.set('University')
    tk.OptionMenu(root, education_var, *education_choices).grid(row=3, column=1)

    # Dropdown for marital status
    tk.Label(root, text="Marital Status").grid(row=4, column=0)
    marriage_var = tk.StringVar(root)
    marriage_choices = ['Married', 'Single', 'Others']
    marriage_var.set('Single')
    tk.OptionMenu(root, marriage_var, *marriage_choices).grid(row=4, column=1)

    # Dropdown for repayment status
    pay_status_var = tk.StringVar(root)
    pay_status_choices = {
        'Pay Duly': -1,
        'No Consumption': 0,
        'Delay 1 month': 1,
        'Delay 2 months': 2,
        'Delay 3 months': 3
    }
    pay_status_var.set('Pay Duly')
    tk.Label(root, text="Repayment Status Month 1").grid(row=5, column=0)
    tk.OptionMenu(root, pay_status_var, *pay_status_choices.keys()).grid(row=5, column=1)

    tk.Button(root, text="Predict", command=predict).grid(row=6, column=0, columnspan=2)
    root.mainloop()


## Main Execution Flow

Load the dataset, train the model, and launch the GUI.


In [7]:
file_path = 'UCI_Credit_Card.csv'
X_train_balanced, X_test, y_train_balanced, y_test, X_train_columns = load_clean_and_balance_data(file_path)

# Train and evaluate the model
model, scaler = train_and_evaluate_model(X_train_balanced, y_train_balanced, X_test, y_test)

# Launch the GUI
create_gui(model, scaler, X_train_columns)


## Conclusion

This notebook provided a complete workflow for predicting credit repayment using machine learning. We loaded, cleaned, and balanced the dataset, trained an XGBoost model, and created a user-friendly graphical interface for making predictions.


---
<p style="text-align: left; font-size: 18px; font-style: italic;">Suheyl Ozdemir</p>
