In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from kerastuner.tuners import RandomSearch
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

In [13]:
df = pd.read_csv('../datasets/Mall_Customers.csv')

In [14]:
# Map genders to numbers
gender_map = {'Male': 0, 'Female': 1}
df['Gender'] = df['Gender'].replace(gender_map)

  df['Gender'] = df['Gender'].replace(gender_map)


In [15]:
# Define spending categories based on score thresholds
def categorize_spending(score):
    if score < 33:
        return 1
    elif score <= 66:
        return 2
    else:
        return 3

In [16]:
# Apply the categorization function to create a new column 'Spending Category'
df['Spending Category'] = df['Spending Score (1-100)'].apply(categorize_spending)
df.drop(columns=['CustomerID'], inplace=True)
df.drop(columns=['Spending Score (1-100)'], inplace=True)

In [17]:
X = df.drop('Spending Category', axis=1)
y = df['Spending Category']

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data=X_rescaled, columns=X.columns)

# One-hot encoding
y = pd.get_dummies(y)

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
def create_model_and_train(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units_input', min_value=3, max_value=45, step=3),
                    activation='relu', input_shape=(X_train.shape[1],)))

    # Add hidden layers
    for i in range(hp.Int('num_layers', 1, 3)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=3, max_value=45, step=3), activation='relu'))

    model.add(Dense(y_train.shape[1], activation='softmax'))
    model.compile(optimizer=Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)
    
    return model

# Define tuner
tuner = RandomSearch(
    create_model_and_train,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='hyperparameter_tuning',
    project_name='mall_customers'
)

# Hyperparameter tuning
tuner.search(X_train, y_train, epochs=10, validation_split=0.2)

# Retrieve best model
best_model = tuner.get_best_models(num_models=1)[0]
best_model.save('../models/nn.keras')

Reloading Tuner from hyperparameter_tuning\mall_customers\tuner0.json
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.4573 - loss: 1.0841 - val_accuracy: 0.5312 - val_loss: 0.9809
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4479 - loss: 1.0447 - val_accuracy: 0.5625 - val_loss: 0.9666
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4896 - loss: 0.9856 - val_accuracy: 0.5312 - val_loss: 0.8982
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5948 - loss: 0.9069 - val_accuracy: 0.4688 - val_loss: 0.9182
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5625 - loss: 0.8886 - val_accuracy: 0.5625 - val_loss: 0.8842
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5792 - loss: 0.8871 - val_accuracy: 0.5312 - val_loss: 0.8880
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [20]:
print('Best model:')
print(best_model.summary())

Best model:


None


In [21]:
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(np.array(y_test), axis=1)

print("Confusion Matrix:")
print(confusion_matrix(y_true_classes, y_pred_classes))

print("Classification Report:")
print(classification_report(y_true_classes, y_pred_classes, zero_division=1))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Confusion Matrix:
[[ 8  2  3]
 [ 0 18  2]
 [ 0  1  6]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.62      0.76        13
           1       0.86      0.90      0.88        20
           2       0.55      0.86      0.67         7

    accuracy                           0.80        40
   macro avg       0.80      0.79      0.77        40
weighted avg       0.85      0.80      0.80        40

