In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
import pickle
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
import tensorflow as tf
import seaborn as sns
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import classification_report,confusion_matrix
import numpy as np
import matplotlib.pylab as plt

In [None]:
customer_df = pd.read_csv('customers.csv')

## Data Exploration

In [None]:
customer_df.columns

In [None]:
customer_df.dtypes

In [None]:
customer_df.shape

In [None]:
customer_df.isnull().sum()

In [None]:
customer_df_cleaned = customer_df.dropna(axis=0)

In [None]:
customer_df_cleaned.isnull().sum()

In [None]:
customer_df_cleaned.shape

In [None]:
customer_df_cleaned.dtypes

In [None]:
customer_df_cleaned['Gender'].unique()

In [None]:
customer_df_cleaned['Ever_Married'].unique()

In [None]:
customer_df_cleaned['Graduated'].unique()

In [None]:
customer_df_cleaned['Profession'].unique()

In [None]:
customer_df_cleaned['Spending_Score'].unique()

In [None]:
customer_df_cleaned['Var_1'].unique()

In [None]:
customer_df_cleaned['Segmentation'].unique()

In [None]:
categories_list=[['Male', 'Female'],
           ['No', 'Yes'],
           ['No', 'Yes'],
           ['Healthcare', 'Engineer', 'Lawyer', 'Artist', 'Doctor',
            'Homemaker', 'Entertainment', 'Marketing', 'Executive'],
           ['Low', 'Average', 'High']
           ]
enc = OrdinalEncoder(categories=categories_list)

In [None]:
customers_1 = customer_df_cleaned.copy()

In [None]:
customers_1[['Gender',
             'Ever_Married',
              'Graduated','Profession',
              'Spending_Score']] = enc.fit_transform(customers_1[['Gender',
                                                                 'Ever_Married',
                                                                 'Graduated','Profession',
                                                                 'Spending_Score']])

In [None]:
customers_1.dtypes

In [None]:
le = LabelEncoder()

In [None]:
customers_1['Segmentation'] = le.fit_transform(customers_1['Segmentation'])

In [None]:
customers_1.dtypes

In [None]:
customers_1 = customers_1.drop('ID',axis=1)
customers_1 = customers_1.drop('Var_1',axis=1)

In [None]:
customers_1.dtypes

In [None]:
# Calculate the correlation matrix
corr = customers_1.corr()

# Plot the heatmap
sns.heatmap(corr, 
        xticklabels=corr.columns,
        yticklabels=corr.columns,
        cmap="BuPu",
        annot= True)

In [None]:
sns.pairplot(customers_1)

In [None]:
sns.distplot(customers_1['Age'])

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(customers_1['Family_Size'])

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='Family_Size',y='Age',data=customers_1)

In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x='Family_Size',y='Spending_Score',data=customers_1)

In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x='Family_Size',y='Age',data=customers_1)

In [None]:
customers_1.describe()

In [None]:
customers_1['Segmentation'].unique()

In [None]:
X=customers_1[['Gender','Ever_Married','Age','Graduated','Profession','Work_Experience','Spending_Score','Family_Size']].values

In [None]:
y1 = customers_1[['Segmentation']].values

In [None]:
one_hot_enc = OneHotEncoder()

In [None]:
one_hot_enc.fit(y1)

In [None]:
y1.shape

In [None]:
y = one_hot_enc.transform(y1).toarray()

In [None]:
y.shape

In [None]:
y1[0]

In [None]:
y[0]

In [None]:
X.shape

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,
                                               test_size=0.33,
                                               random_state=50)

In [None]:
X_train[0]

In [None]:
X_train.shape

In [None]:
scaler_age = MinMaxScaler()

In [None]:
scaler_age.fit(X_train[:,2].reshape(-1,1))

In [None]:
X_train_scaled = np.copy(X_train)
X_test_scaled = np.copy(X_test)

In [None]:
# To scale the Age column
X_train_scaled[:,2] = scaler_age.transform(X_train[:,2].reshape(-1,1)).reshape(-1)
X_test_scaled[:,2] = scaler_age.transform(X_test[:,2].reshape(-1,1)).reshape(-1)

In [None]:
# Creating the model
ai_brain = Sequential([
    Dense(8,input_shape=(8,)),
    Dense(16,activation='relu),
    Dense(24,activation='relu),
    Dense(32,activation='relu'),
    Dense(4,activation='softmax')
  
])

In [None]:
ai_brain.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=2)

In [None]:
ai_brain.fit(x=X_train_scaled,y=y_train,
             epochs=2000,batch_size=256,
             validation_data=(X_test_scaled,y_test),
             )

In [None]:
metrics = pd.DataFrame(ai_brain.history.history)

In [None]:
metrics.head()

In [None]:
metrics[['loss','val_loss']].plot()

In [None]:
# Sequential predict_classes function is deprecated
# predictions = ai_brain.predict_classes(X_test)
x_test_predictions = np.argmax(ai_brain.predict(X_test_scaled), axis=1)

In [None]:
x_test_predictions.shape

In [None]:
y_test_truevalue = np.argmax(y_test,axis=1)

In [None]:
y_test_truevalue.shape

In [None]:
print(confusion_matrix(y_test_truevalue,x_test_predictions))

In [None]:
print(classification_report(y_test_truevalue,x_test_predictions))

In [None]:
# Saving the Model
ai_brain.save('customer_classification_model.h5')

In [None]:
# Saving the data
with open('customer_data.pickle', 'wb') as fh:
   pickle.dump([X_train_scaled,y_train,X_test_scaled,y_test,customers_1,customer_df_cleaned,scaler_age,enc,one_hot_enc,le], fh)

In [None]:
# Loading the Model
ai_brain = load_model('customer_classification_model.h5')

In [None]:
# Loading the data
with open('customer_data.pickle', 'rb') as fh:
   [X_train_scaled,y_train,X_test_scaled,y_test,customers_1,customer_df_cleaned,scaler_age,enc,one_hot_enc,le]=pickle.load(fh)

# Prediction for a single input

In [None]:
x_single_prediction = np.argmax(ai_brain.predict(X_test_scaled[1:2,:]), axis=1)

In [None]:
print(x_single_prediction)

In [None]:
print(le.inverse_transform(x_single_prediction))