In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [3]:

# Load data
df = pd.read_csv("expanded_data.csv")

In [4]:
df.head()

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 225 non-null    int64 
 1   Gender              225 non-null    object
 2   Income              225 non-null    int64 
 3   Education           225 non-null    object
 4   Marital Status      225 non-null    object
 5   Number of Children  225 non-null    int64 
 6   Home Ownership      225 non-null    object
 7   Credit Score        225 non-null    object
dtypes: int64(3), object(5)
memory usage: 14.2+ KB


In [6]:
df = df.drop_duplicates()
df.fillna(df.mode().iloc[0], inplace=True)  

In [7]:
df.isnull().sum()

Age                   0
Gender                0
Income                0
Education             0
Marital Status        0
Number of Children    0
Home Ownership        0
Credit Score          0
dtype: int64

In [8]:
target = 'Credit Score'
X = df.drop(columns=[target])
y = df[target]


In [9]:
categorical_cols = X.select_dtypes(include='object').columns
categorical_mappings = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    categorical_mappings[col] = list(le.classes_)

In [10]:
print(categorical_cols)

Index(['Gender', 'Education', 'Marital Status', 'Home Ownership'], dtype='object')


In [11]:
feature_names = X.columns.tolist()

In [12]:
feature_names

['Age',
 'Gender',
 'Income',
 'Education',
 'Marital Status',
 'Number of Children',
 'Home Ownership']

In [13]:
X

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership
0,25,0,50000,1,1,0,1
1,30,1,100000,4,0,2,0
2,35,0,75000,2,0,1,0
3,40,1,125000,3,1,0,0
4,45,0,100000,1,0,3,0
...,...,...,...,...,...,...,...
220,28,0,31978,0,1,0,1
221,28,0,29753,3,1,0,1
222,27,0,33275,1,1,0,1
223,29,0,25761,3,1,0,1


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
model = RandomForestClassifier(n_estimators=100,class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

In [16]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     Average       0.94      1.00      0.97        16
        High       1.00      0.92      0.96        13
         Low       1.00      1.00      1.00        16

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.98        45
weighted avg       0.98      0.98      0.98        45



In [17]:
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("columns.pkl", "wb") as f:
    pickle.dump(feature_names, f)

with open("categorical_mappings.pkl", "wb") as f:
    pickle.dump(categorical_mappings, f)

In [18]:
print("🔍 Target Class Distribution:")
print(df['Credit Score'].value_counts())


🔍 Target Class Distribution:
Credit Score
High       75
Low        75
Average    74
Name: count, dtype: int64


In [None]:

df.head(50)

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High
5,50,Male,150000,Master's Degree,Married,0,Owned,High
6,26,Female,40000,Associate's Degree,Single,0,Rented,Average
7,31,Male,60000,Bachelor's Degree,Single,0,Rented,Average
8,36,Female,80000,Master's Degree,Married,2,Owned,High
9,41,Male,105000,Doctorate,Single,0,Owned,High
