In [122]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, multilabel_confusion_matrix, mean_squared_error, accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [123]:
df = pd.read_csv('../datasets/Mall_Customers.csv')

In [124]:
# Map genders to numbers
gender_map = {'Male': 0, 'Female': 1}
pd.set_option('future.no_silent_downcasting', True)
df['Gender'] = df['Gender'].replace(gender_map)

In [125]:
# Define spending categories based on score thresholds
def categorize_spending(score):
    if score < 33:
        return 1
    elif score <= 66:
        return 2
    else:
        return 3

In [126]:
# Apply the categorization function to create a new column 'Spending Category'
df['Spending Category'] = df['Spending Score (1-100)'].apply(categorize_spending)
df.drop(columns=['CustomerID'], inplace=True)
df.drop(columns=['Spending Score (1-100)'], inplace=True)

In [127]:
X = df.drop('Spending Category', axis = 1)
y = df['Spending Category']

# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = X.columns)

set_of_categories = y.value_counts().index.tolist()
set_of_categories= pd.DataFrame({'Spending Category': set_of_categories})
y = pd.get_dummies(y)

print("Pre-processed data :")
print(X)

print("Pre-processed categories :")
print(y)

Pre-processed data :
     Gender       Age  Annual Income (k$)
0       0.0  0.019231            0.000000
1       0.0  0.057692            0.000000
2       1.0  0.038462            0.008197
3       1.0  0.096154            0.008197
4       1.0  0.250000            0.016393
..      ...       ...                 ...
195     1.0  0.326923            0.860656
196     1.0  0.519231            0.909836
197     0.0  0.269231            0.909836
198     0.0  0.269231            1.000000
199     0.0  0.230769            1.000000

[200 rows x 3 columns]
Pre-processed categories :
         1      2      3
0    False   True  False
1    False  False   True
2     True  False  False
3    False  False   True
4    False   True  False
..     ...    ...    ...
195  False  False   True
196   True  False  False
197  False  False   True
198   True  False  False
199  False  False   True

[200 rows x 3 columns]


In [128]:
data_train, data_test, class_train, class_test = train_test_split(X, y, test_size=0.1)

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 100, hidden_layer_sizes = (15, 5), max_iter = 500)
mlp

In [129]:
y = df['Spending Category']
y = pd.get_dummies(y)
mlp.fit(data_train, class_train)

pred = mlp.predict(data_test)
pred

array([[0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 1, 0]])

In [130]:
print("Accuracy: ", accuracy_score(class_test, pred))
print("Mean Square Error: ", mean_squared_error(class_test, pred))

print("Confusion Matrix for each label: ")
print(multilabel_confusion_matrix(class_test, pred))

print("Classification Report: ")
print(classification_report(class_test, pred))

Accuracy:  0.7
Mean Square Error:  0.13333333333333333
Confusion Matrix for each label: 
[[[15  0]
  [ 2  3]]

 [[ 9  1]
  [ 1  9]]

 [[14  1]
  [ 3  2]]]
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      0.60      0.75         5
           1       0.90      0.90      0.90        10
           2       0.67      0.40      0.50         5

   micro avg       0.88      0.70      0.78        20
   macro avg       0.86      0.63      0.72        20
weighted avg       0.87      0.70      0.76        20
 samples avg       0.70      0.70      0.70        20

