In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder



In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data"
column_names = ["A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15", "class"]
df = pd.read_csv(url, names=column_names, na_values="?")


In [3]:
df.head()

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,class
0,b,30.83,0.0,u,g,w,v,1.25,t,t,1,f,g,202.0,0,+
1,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43.0,560,+
2,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280.0,824,+
3,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100.0,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120.0,0,+


In [4]:
df.describe()

Unnamed: 0,A2,A3,A8,A11,A14,A15
count,678.0,690.0,690.0,690.0,677.0,690.0
mean,31.568171,4.758725,2.223406,2.4,184.014771,1017.385507
std,11.957862,4.978163,3.346513,4.86294,173.806768,5210.102598
min,13.75,0.0,0.0,0.0,0.0,0.0
25%,22.6025,1.0,0.165,0.0,75.0,0.0
50%,28.46,2.75,1.0,0.0,160.0,5.0
75%,38.23,7.2075,2.625,3.0,276.0,395.5
max,80.25,28.0,28.5,67.0,2000.0,100000.0


In [5]:
df.isna().sum()

A1       12
A2       12
A3        0
A4        6
A5        6
A6        9
A7        9
A8        0
A9        0
A10       0
A11       0
A12       0
A13       0
A14      13
A15       0
class     0
dtype: int64

In [6]:
df = df.dropna()
le = LabelEncoder()
df["A1"] = le.fit_transform(df["A1"])
df["A4"] = le.fit_transform(df["A4"])
df["A5"] = le.fit_transform(df["A5"])
df["A6"] = le.fit_transform(df["A6"])
df["A7"] = le.fit_transform(df["A7"])
df["A9"] = le.fit_transform(df["A9"])
df["A10"] = le.fit_transform(df["A10"])
df["A12"] = le.fit_transform(df["A12"])
df["A13"] = le.fit_transform(df["A13"])
df["class"] = df["class"].map({"+" : 1, "-" : 0})

In [7]:
X = df.drop(['class'], axis=1)
Y = df['class']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=1)


In [9]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [10]:
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_rep)

Accuracy: 0.90
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.90      0.91       109
           1       0.88      0.90      0.89        87

    accuracy                           0.90       196
   macro avg       0.90      0.90      0.90       196
weighted avg       0.90      0.90      0.90       196

