In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [18]:
df = pd.read_csv('../datasets/Shopping Mall Customer Segmentation Data.csv')

In [19]:
# Map genders to numbers
gender_map = {'Male': 0, 'Female': 1}
pd.set_option('future.no_silent_downcasting', True)
df['Gender'] = df['Gender'].replace(gender_map)

In [20]:
# Define spending categories based on score thresholds
def categorize_spending(score):
    if score < 33:
        return 1
    elif score <= 66:
        return 2
    else:
        return 3

In [21]:
# Apply the categorization function to create a new column 'Spending Category'
df['Spending Category'] = df['Spending Score (1-100)'].apply(categorize_spending)
df.drop(columns=['CustomerID'], inplace=True)
df.drop(columns=['Spending Score (1-100)'], inplace=True)

In [22]:
### Decision ###
data = df.copy()
X = data.drop(columns=['Spending Category'])
y = data['Spending Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(random_state=42)

clf.fit(X_train, y_train)
joblib.dump(clf, '../models/decision.pkl')

y_pred = clf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           1       0.29      0.30      0.30       950
           2       0.33      0.34      0.34       993
           3       0.37      0.35      0.36      1073

    accuracy                           0.33      3016
   macro avg       0.33      0.33      0.33      3016
weighted avg       0.33      0.33      0.33      3016

Confusion Matrix:
[[285 341 324]
 [340 342 311]
 [347 350 376]]
