In [None]:
# Step 1: Load Data
import pandas as pd

url = "https://raw.githubusercontent.com/priandoyo/ml-csm/main/data.csv"
df = pd.read_csv(url)
df.head()


In [None]:
# Step 2: Preprocessing
from sklearn.preprocessing import LabelEncoder

# Copy original data
data = df.copy()

# Drop Name column (optional, for this demo we won't use it as feature)
data = data.drop(columns=["Name"])

# Encode categorical columns
label_encoders = {}
categorical_cols = ['Job', 'Gender', 'City', 'Approval']

for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

data.head()


In [None]:
# Step 3: Train a Decision Tree Model
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

X = data.drop(columns=["Approval"])
y = data["Approval"]

model = DecisionTreeClassifier(max_depth=3, random_state=42)
model.fit(X, y)


In [None]:
# Step 4: Visualize the Decision Tree
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(16, 8))
plot_tree(model, feature_names=X.columns, class_names=label_encoders['Approval'].classes_, filled=True)
plt.title("Credit Scoring Decision Tree")
plt.show()


In [None]:
# Step 5: Test a Single Customer Manually

# Example customer input:
sample = {
    "Age": 32,
    "Job": "Engineer",
    "Gender": "M",
    "City": "Bandung",
    "Salary": 4700000
}

# Convert to model input using encoders
sample_encoded = pd.DataFrame([{
    "Age": sample["Age"],
    "Job": label_encoders["Job"].transform([sample["Job"]])[0],
    "Gender": label_encoders["Gender"].transform([sample["Gender"]])[0],
    "City": label_encoders["City"].transform([sample["City"]])[0],
    "Salary": sample["Salary"]
}])

# Predict
pred = model.predict(sample_encoded)[0]
pred_label = label_encoders["Approval"].inverse_transform([pred])[0]

print("Prediction result:", pred_label)
