In [79]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [80]:
credit_record = pd.read_csv('credit_record.csv')
application_record = pd.read_csv('application_record.csv')

In [81]:
merged_data = pd.merge(application_record, credit_record, on='ID', how='inner')

In [82]:
def classify_credit_status(status):
    if status in ['0', 'C']:
        return 'Non-Risky'
    else:
        return 'Risky'

In [83]:
merged_data['CREDIT_STATUS'] = merged_data['STATUS'].apply(classify_credit_status)

In [84]:
merged_data.head()

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,...,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,MONTHS_BALANCE,STATUS,CREDIT_STATUS
0,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,-4542,1,1,0,0,,2.0,0,C,Non-Risky
1,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,-4542,1,1,0,0,,2.0,-1,C,Non-Risky
2,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,-4542,1,1,0,0,,2.0,-2,C,Non-Risky
3,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,-4542,1,1,0,0,,2.0,-3,C,Non-Risky
4,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,-4542,1,1,0,0,,2.0,-4,C,Non-Risky


In [85]:
X = merged_data[['CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'FLAG_MOBIL', 'FLAG_WORK_PHONE', 'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE', 'CNT_FAM_MEMBERS']]
y = merged_data['CREDIT_STATUS']

In [86]:
label_encoder = LabelEncoder()
X_encoded = X.apply(label_encoder.fit_transform)

In [87]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [88]:
print("Traning Samples:", len(X_train))
print("Test Samples:", len(X_test))

Traning Samples: 622172
Test Samples: 155543


In [89]:
model = RandomForestClassifier()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [90]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [91]:
print(f'Accuracy: {accuracy:.2f}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{classification_rep}')

Accuracy: 0.85
Confusion Matrix:
[[118935   5294]
 [ 18105  13209]]
Classification Report:
              precision    recall  f1-score   support

   Non-Risky       0.87      0.96      0.91    124229
       Risky       0.71      0.42      0.53     31314

    accuracy                           0.85    155543
   macro avg       0.79      0.69      0.72    155543
weighted avg       0.84      0.85      0.83    155543



In [98]:
# Create sample customer data (use sample column names and values)
new_customer_data = pd.DataFrame({
    'CODE_GENDER': ['M'],
    'FLAG_OWN_CAR': ['N'],
    'FLAG_OWN_REALTY': ['N'],
    'CNT_CHILDREN': [3],
    'AMT_INCOME_TOTAL': [450000],
    'NAME_INCOME_TYPE': ['Working'],
    'NAME_EDUCATION_TYPE': ['Secondary / secondary special'],
    'NAME_FAMILY_STATUS': ['Single / not married'],
    'NAME_HOUSING_TYPE': ['House / apartment'],
    'DAYS_BIRTH': [-2500],
    'DAYS_EMPLOYED': [-500],
    'FLAG_MOBIL': [1],
    'FLAG_WORK_PHONE': [0],
    'FLAG_PHONE': [1],
    'FLAG_EMAIL': [0],
    'OCCUPATION_TYPE': ['Laborers'],
    'CNT_FAM_MEMBERS': [4]
})


In [101]:
label_encoder = LabelEncoder()
new_customer_data_encoded = new_customer_data.apply(label_encoder.fit_transform)

prediction = model.predict(new_customer_data_encoded)
print(f'Predicted Credit Status: {prediction[0]}')

Predicted Credit Status: Non-Risky
